diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go index 1d4d85a5..b9a38272 100644 --- a/internal/runtime/executor/kiro_executor.go +++ b/internal/runtime/executor/kiro_executor.go @@ -19,6 +19,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" kiroclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude" kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" + kiroopenai "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -161,6 +162,22 @@ type KiroExecutor struct { refreshMu sync.Mutex // Serializes token refresh operations to prevent race conditions } +// buildKiroPayloadForFormat builds the Kiro API payload based on the source format. +// This is critical because OpenAI and Claude formats have different tool structures: +// - OpenAI: tools[].function.name, tools[].function.description +// - Claude: tools[].name, tools[].description +func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) []byte { + switch sourceFormat.String() { + case "openai": + log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String()) + return kiroopenai.BuildKiroPayloadFromOpenAI(body, modelID, profileArn, origin, isAgentic, isChatOnly) + default: + // Default to Claude format (also handles "claude", "kiro", etc.) + log.Debugf("kiro: using Claude payload builder for source format: %s", sourceFormat.String()) + return kiroclaude.BuildKiroPayload(body, modelID, profileArn, origin, isAgentic, isChatOnly) + } +} + // NewKiroExecutor creates a new Kiro executor instance. func NewKiroExecutor(cfg *config.Config) *KiroExecutor { return &KiroExecutor{cfg: cfg} @@ -231,7 +248,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. // Rebuild payload with the correct origin for this endpoint // Each endpoint requires its matching Origin value in the request body - kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)", endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin) @@ -341,7 +358,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) // Rebuild payload with new profile ARN if changed - kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) log.Infof("kiro: token refreshed successfully, retrying request") continue } @@ -398,7 +415,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. if refreshedAuth != nil { auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) - kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) log.Infof("kiro: token refreshed for 403, retrying request") continue } @@ -537,7 +554,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox // Rebuild payload with the correct origin for this endpoint // Each endpoint requires its matching Origin value in the request body - kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)", endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin) @@ -660,7 +677,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) // Rebuild payload with new profile ARN if changed - kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) log.Infof("kiro: token refreshed successfully, retrying stream request") continue } @@ -717,7 +734,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox if refreshedAuth != nil { auth = refreshedAuth accessToken, profileArn = kiroCredentials(auth) - kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from) log.Infof("kiro: token refreshed for 403, retrying stream request") continue } @@ -755,7 +772,20 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox } }() - e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter) + // Check if thinking mode was enabled in the original request + // Only parse tags when thinking was explicitly requested + // Check multiple sources: original request, pre-translation payload, and translated body + // This handles different client formats (Claude API, OpenAI, AMP/Cursor) + thinkingEnabled := kiroclaude.IsThinkingEnabled(opts.OriginalRequest) + if !thinkingEnabled { + thinkingEnabled = kiroclaude.IsThinkingEnabled(req.Payload) + } + if !thinkingEnabled { + thinkingEnabled = kiroclaude.IsThinkingEnabled(body) + } + log.Debugf("kiro: stream thinkingEnabled = %v", thinkingEnabled) + + e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter, thinkingEnabled) }(httpResp) return out, nil @@ -807,6 +837,153 @@ func kiroCredentials(auth *cliproxyauth.Auth) (accessToken, profileArn string) { return accessToken, profileArn } +// findRealThinkingEndTag finds the real end tag, skipping false positives. +// Returns -1 if no real end tag is found. +// +// Real tags from Kiro API have specific characteristics: +// - Usually preceded by newline (.\n) +// - Usually followed by newline (\n\n) +// - Not inside code blocks or inline code +// +// False positives (discussion text) have characteristics: +// - In the middle of a sentence +// - Preceded by discussion words like "标签", "tag", "returns" +// - Inside code blocks or inline code +// +// Parameters: +// - content: the content to search in +// - alreadyInCodeBlock: whether we're already inside a code block from previous chunks +// - alreadyInInlineCode: whether we're already inside inline code from previous chunks +func findRealThinkingEndTag(content string, alreadyInCodeBlock, alreadyInInlineCode bool) int { + searchStart := 0 + for { + endIdx := strings.Index(content[searchStart:], kirocommon.ThinkingEndTag) + if endIdx < 0 { + return -1 + } + endIdx += searchStart // Adjust to absolute position + + textBeforeEnd := content[:endIdx] + textAfterEnd := content[endIdx+len(kirocommon.ThinkingEndTag):] + + // Check 1: Is it inside inline code? + // Count backticks in current content and add state from previous chunks + backtickCount := strings.Count(textBeforeEnd, "`") + effectiveInInlineCode := alreadyInInlineCode + if backtickCount%2 == 1 { + effectiveInInlineCode = !effectiveInInlineCode + } + if effectiveInInlineCode { + log.Debugf("kiro: found inside inline code at pos %d, skipping", endIdx) + searchStart = endIdx + len(kirocommon.ThinkingEndTag) + continue + } + + // Check 2: Is it inside a code block? + // Count fences in current content and add state from previous chunks + fenceCount := strings.Count(textBeforeEnd, "```") + altFenceCount := strings.Count(textBeforeEnd, "~~~") + effectiveInCodeBlock := alreadyInCodeBlock + if fenceCount%2 == 1 || altFenceCount%2 == 1 { + effectiveInCodeBlock = !effectiveInCodeBlock + } + if effectiveInCodeBlock { + log.Debugf("kiro: found inside code block at pos %d, skipping", endIdx) + searchStart = endIdx + len(kirocommon.ThinkingEndTag) + continue + } + + // Check 3: Real tags are usually preceded by newline or at start + // and followed by newline or at end. Check the format. + charBeforeTag := byte(0) + if endIdx > 0 { + charBeforeTag = content[endIdx-1] + } + charAfterTag := byte(0) + if len(textAfterEnd) > 0 { + charAfterTag = textAfterEnd[0] + } + + // Real end tag format: preceded by newline OR end of sentence (. ! ?) + // and followed by newline OR end of content + isPrecededByNewlineOrSentenceEnd := charBeforeTag == '\n' || charBeforeTag == '.' || + charBeforeTag == '!' || charBeforeTag == '?' || charBeforeTag == 0 + isFollowedByNewlineOrEnd := charAfterTag == '\n' || charAfterTag == 0 + + // If the tag has proper formatting (newline before/after), it's likely real + if isPrecededByNewlineOrSentenceEnd && isFollowedByNewlineOrEnd { + log.Debugf("kiro: found properly formatted at pos %d", endIdx) + return endIdx + } + + // Check 4: Is the tag preceded by discussion keywords on the same line? + lastNewlineIdx := strings.LastIndex(textBeforeEnd, "\n") + lineBeforeTag := textBeforeEnd + if lastNewlineIdx >= 0 { + lineBeforeTag = textBeforeEnd[lastNewlineIdx+1:] + } + lineBeforeTagLower := strings.ToLower(lineBeforeTag) + + // Discussion patterns - if found, this is likely discussion text + discussionPatterns := []string{ + "标签", "返回", "输出", "包含", "使用", "解析", "转换", "生成", // Chinese + "tag", "return", "output", "contain", "use", "parse", "emit", "convert", "generate", // English + "", // discussing both tags together + "``", // explicitly in inline code + } + isDiscussion := false + for _, pattern := range discussionPatterns { + if strings.Contains(lineBeforeTagLower, pattern) { + isDiscussion = true + break + } + } + if isDiscussion { + log.Debugf("kiro: found after discussion text at pos %d, skipping", endIdx) + searchStart = endIdx + len(kirocommon.ThinkingEndTag) + continue + } + + // Check 5: Is there text immediately after on the same line? + // Real end tags don't have text immediately after on the same line + if len(textAfterEnd) > 0 && charAfterTag != '\n' && charAfterTag != 0 { + // Find the next newline + nextNewline := strings.Index(textAfterEnd, "\n") + var textOnSameLine string + if nextNewline >= 0 { + textOnSameLine = textAfterEnd[:nextNewline] + } else { + textOnSameLine = textAfterEnd + } + // If there's non-whitespace text on the same line after the tag, it's discussion + if strings.TrimSpace(textOnSameLine) != "" { + log.Debugf("kiro: found with text after on same line at pos %d, skipping", endIdx) + searchStart = endIdx + len(kirocommon.ThinkingEndTag) + continue + } + } + + // Check 6: Is there another tag after this ? + if strings.Contains(textAfterEnd, kirocommon.ThinkingStartTag) { + nextStartIdx := strings.Index(textAfterEnd, kirocommon.ThinkingStartTag) + textBeforeNextStart := textAfterEnd[:nextStartIdx] + nextBacktickCount := strings.Count(textBeforeNextStart, "`") + nextFenceCount := strings.Count(textBeforeNextStart, "```") + nextAltFenceCount := strings.Count(textBeforeNextStart, "~~~") + + // If the next is NOT in code, then this is discussion text + if nextBacktickCount%2 == 0 && nextFenceCount%2 == 0 && nextAltFenceCount%2 == 0 { + log.Debugf("kiro: found followed by at pos %d, likely discussion text, skipping", endIdx) + searchStart = endIdx + len(kirocommon.ThinkingEndTag) + continue + } + } + + // This looks like a real end tag + return endIdx + } +} + // determineAgenticMode determines if the model is an agentic or chat-only variant. // Returns (isAgentic, isChatOnly) based on model name suffixes. func determineAgenticMode(model string) (isAgentic, isChatOnly bool) { @@ -963,6 +1140,9 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki processedIDs := make(map[string]bool) var currentToolUse *kiroclaude.ToolUseState + // Upstream usage tracking - Kiro API returns credit usage and context percentage + var upstreamContextPercentage float64 // Context usage percentage from upstream (e.g., 78.56) + for { msg, eventErr := e.readEventStreamMessage(reader) if eventErr != nil { @@ -1119,6 +1299,119 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki stopReason = sr log.Debugf("kiro: parseEventStream found stopReason in messageStopEvent: %s", stopReason) } + + case "messageMetadataEvent": + // Handle message metadata events which may contain token counts + if metadata, ok := event["messageMetadataEvent"].(map[string]interface{}); ok { + if inputTokens, ok := metadata["inputTokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + log.Debugf("kiro: parseEventStream found inputTokens in messageMetadataEvent: %d", usageInfo.InputTokens) + } + if outputTokens, ok := metadata["outputTokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + log.Debugf("kiro: parseEventStream found outputTokens in messageMetadataEvent: %d", usageInfo.OutputTokens) + } + if totalTokens, ok := metadata["totalTokens"].(float64); ok { + usageInfo.TotalTokens = int64(totalTokens) + log.Debugf("kiro: parseEventStream found totalTokens in messageMetadataEvent: %d", usageInfo.TotalTokens) + } + } + + case "usageEvent", "usage": + // Handle dedicated usage events + if inputTokens, ok := event["inputTokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + log.Debugf("kiro: parseEventStream found inputTokens in usageEvent: %d", usageInfo.InputTokens) + } + if outputTokens, ok := event["outputTokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + log.Debugf("kiro: parseEventStream found outputTokens in usageEvent: %d", usageInfo.OutputTokens) + } + if totalTokens, ok := event["totalTokens"].(float64); ok { + usageInfo.TotalTokens = int64(totalTokens) + log.Debugf("kiro: parseEventStream found totalTokens in usageEvent: %d", usageInfo.TotalTokens) + } + // Also check nested usage object + if usageObj, ok := event["usage"].(map[string]interface{}); ok { + if inputTokens, ok := usageObj["input_tokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + } + if outputTokens, ok := usageObj["output_tokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + } + if totalTokens, ok := usageObj["total_tokens"].(float64); ok { + usageInfo.TotalTokens = int64(totalTokens) + } + log.Debugf("kiro: parseEventStream found usage object: input=%d, output=%d, total=%d", + usageInfo.InputTokens, usageInfo.OutputTokens, usageInfo.TotalTokens) + } + + case "metricsEvent": + // Handle metrics events which may contain usage data + if metrics, ok := event["metricsEvent"].(map[string]interface{}); ok { + if inputTokens, ok := metrics["inputTokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + } + if outputTokens, ok := metrics["outputTokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + } + log.Debugf("kiro: parseEventStream found metricsEvent: input=%d, output=%d", + usageInfo.InputTokens, usageInfo.OutputTokens) + } + + default: + // Check for contextUsagePercentage in any event + if ctxPct, ok := event["contextUsagePercentage"].(float64); ok { + upstreamContextPercentage = ctxPct + log.Debugf("kiro: parseEventStream received context usage: %.2f%%", upstreamContextPercentage) + } + // Log unknown event types for debugging (to discover new event formats) + log.Debugf("kiro: parseEventStream unknown event type: %s, payload: %s", eventType, string(payload)) + } + + // Check for direct token fields in any event (fallback) + if usageInfo.InputTokens == 0 { + if inputTokens, ok := event["inputTokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + log.Debugf("kiro: parseEventStream found direct inputTokens: %d", usageInfo.InputTokens) + } + } + if usageInfo.OutputTokens == 0 { + if outputTokens, ok := event["outputTokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + log.Debugf("kiro: parseEventStream found direct outputTokens: %d", usageInfo.OutputTokens) + } + } + + // Check for usage object in any event (OpenAI format) + if usageInfo.InputTokens == 0 || usageInfo.OutputTokens == 0 { + if usageObj, ok := event["usage"].(map[string]interface{}); ok { + if usageInfo.InputTokens == 0 { + if inputTokens, ok := usageObj["input_tokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok { + usageInfo.InputTokens = int64(inputTokens) + } + } + if usageInfo.OutputTokens == 0 { + if outputTokens, ok := usageObj["output_tokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok { + usageInfo.OutputTokens = int64(outputTokens) + } + } + if usageInfo.TotalTokens == 0 { + if totalTokens, ok := usageObj["total_tokens"].(float64); ok { + usageInfo.TotalTokens = int64(totalTokens) + } + } + log.Debugf("kiro: parseEventStream found usage object (fallback): input=%d, output=%d, total=%d", + usageInfo.InputTokens, usageInfo.OutputTokens, usageInfo.TotalTokens) + } } // Also check nested supplementaryWebLinksEvent @@ -1157,6 +1450,20 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki log.Warnf("kiro: response truncated due to max_tokens limit") } + // Use contextUsagePercentage to calculate more accurate input tokens + // Kiro model has 200k max context, contextUsagePercentage represents the percentage used + // Formula: input_tokens = contextUsagePercentage * 200000 / 100 + if upstreamContextPercentage > 0 { + calculatedInputTokens := int64(upstreamContextPercentage * 200000 / 100) + if calculatedInputTokens > 0 { + localEstimate := usageInfo.InputTokens + usageInfo.InputTokens = calculatedInputTokens + usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens + log.Infof("kiro: parseEventStream using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)", + upstreamContextPercentage, calculatedInputTokens, localEstimate) + } + } + return cleanedContent, toolUses, usageInfo, stopReason, nil } @@ -1357,7 +1664,8 @@ func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string { // Includes embedded [Called ...] tool call parsing and input buffering for toolUseEvent. // Implements duplicate content filtering using lastContentEvent detection (based on AIClient-2-API). // Extracts stop_reason from upstream events when available. -func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) { +// thinkingEnabled controls whether tags are parsed - only parse when request enabled thinking. +func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter, thinkingEnabled bool) { reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers var totalUsage usage.Detail var hasToolUses bool // Track if any tool uses were emitted @@ -1383,6 +1691,11 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out var lastUsageUpdateTime = time.Now() // Last time usage update was sent var lastReportedOutputTokens int64 // Last reported output token count + // Upstream usage tracking - Kiro API returns credit usage and context percentage + var upstreamCreditUsage float64 // Credit usage from upstream (e.g., 1.458) + var upstreamContextPercentage float64 // Context usage percentage from upstream (e.g., 78.56) + var hasUpstreamUsage bool // Whether we received usage from upstream + // Translator param for maintaining tool call state across streaming events // IMPORTANT: This must persist across all TranslateStream calls var translatorParam any @@ -1395,6 +1708,22 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out isThinkingBlockOpen := false // Track if thinking content block is open thinkingBlockIndex := -1 // Index of the thinking content block + // Code block state tracking for heuristic thinking tag parsing + // When inside a markdown code block, tags should NOT be parsed + // This prevents false positives when the model outputs code examples containing these tags + inCodeBlock := false + codeFenceType := "" // Track which fence type opened the block ("```" or "~~~") + + // Inline code state tracking - when inside backticks, don't parse thinking tags + // This handles cases like `` being discussed in text + inInlineCode := false + + // Track if we've seen any non-whitespace content before a thinking tag + // Real thinking blocks from Kiro always start at the very beginning of the response + // If we see content before , subsequent tags are likely discussion text + hasSeenNonThinkingContent := false + thinkingBlockCompleted := false // Track if we've already completed a thinking block + // Pre-calculate input tokens from request if possible // Kiro uses Claude format, so try Claude format first, then OpenAI format, then fallback if enc, err := getTokenizer(model); err == nil { @@ -1629,6 +1958,66 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out log.Debugf("kiro: streamToChannel found stopReason in messageStopEvent: %s", upstreamStopReason) } + default: + // Check for upstream usage events from Kiro API + // Format: {"unit":"credit","unitPlural":"credits","usage":1.458} + if unit, ok := event["unit"].(string); ok && unit == "credit" { + if usage, ok := event["usage"].(float64); ok { + upstreamCreditUsage = usage + hasUpstreamUsage = true + log.Debugf("kiro: received upstream credit usage: %.4f", upstreamCreditUsage) + } + } + // Format: {"contextUsagePercentage":78.56} + if ctxPct, ok := event["contextUsagePercentage"].(float64); ok { + upstreamContextPercentage = ctxPct + log.Debugf("kiro: received upstream context usage: %.2f%%", upstreamContextPercentage) + } + + // Check for token counts in unknown events + if inputTokens, ok := event["inputTokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + hasUpstreamUsage = true + log.Debugf("kiro: streamToChannel found inputTokens in event %s: %d", eventType, totalUsage.InputTokens) + } + if outputTokens, ok := event["outputTokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + hasUpstreamUsage = true + log.Debugf("kiro: streamToChannel found outputTokens in event %s: %d", eventType, totalUsage.OutputTokens) + } + if totalTokens, ok := event["totalTokens"].(float64); ok { + totalUsage.TotalTokens = int64(totalTokens) + log.Debugf("kiro: streamToChannel found totalTokens in event %s: %d", eventType, totalUsage.TotalTokens) + } + + // Check for usage object in unknown events (OpenAI/Claude format) + if usageObj, ok := event["usage"].(map[string]interface{}); ok { + if inputTokens, ok := usageObj["input_tokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + hasUpstreamUsage = true + } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + hasUpstreamUsage = true + } + if outputTokens, ok := usageObj["output_tokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + hasUpstreamUsage = true + } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + hasUpstreamUsage = true + } + if totalTokens, ok := usageObj["total_tokens"].(float64); ok { + totalUsage.TotalTokens = int64(totalTokens) + } + log.Debugf("kiro: streamToChannel found usage object in event %s: input=%d, output=%d, total=%d", + eventType, totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens) + } + + // Log unknown event types for debugging (to discover new event formats) + if eventType != "" { + log.Debugf("kiro: streamToChannel unknown event type: %s, payload: %s", eventType, string(payload)) + } + case "assistantResponseEvent": var contentDelta string var toolUses []map[string]interface{} @@ -1742,9 +2131,243 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } for len(remaining) > 0 { + // CRITICAL FIX: Only parse tags when thinking mode was enabled in the request. + // When thinking is NOT enabled, tags in responses should be treated as + // regular text content, not as thinking blocks. This prevents normal text content + // from being incorrectly parsed as thinking when the model outputs tags + // without the user requesting thinking mode. + if !thinkingEnabled { + // Thinking not enabled - emit all content as regular text without parsing tags + if remaining != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + + claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + break // Exit the for loop - all content processed as text + } + + // HEURISTIC FIX: Track code block and inline code state to avoid parsing tags + // inside code contexts. When the model outputs code examples containing these tags, + // they should be treated as text. + if !inThinkBlock { + // Check for inline code backticks first (higher priority than code fences) + // This handles cases like `` being discussed in text + backtickIdx := strings.Index(remaining, kirocommon.InlineCodeMarker) + thinkingIdx := strings.Index(remaining, kirocommon.ThinkingStartTag) + + // If backtick comes before thinking tag, handle inline code + if backtickIdx >= 0 && (thinkingIdx < 0 || backtickIdx < thinkingIdx) { + if inInlineCode { + // Closing backtick - emit content up to and including backtick, exit inline code + textToEmit := remaining[:backtickIdx+1] + if textToEmit != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + remaining = remaining[backtickIdx+1:] + inInlineCode = false + continue + } else { + // Opening backtick - emit content before backtick, enter inline code + textToEmit := remaining[:backtickIdx+1] + if textToEmit != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + remaining = remaining[backtickIdx+1:] + inInlineCode = true + continue + } + } + + // If inside inline code, emit all content as text (don't parse thinking tags) + if inInlineCode { + if remaining != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + break // Exit loop - remaining content is inside inline code + } + + // Check for code fence markers (``` or ~~~) to toggle code block state + fenceIdx := strings.Index(remaining, kirocommon.CodeFenceMarker) + altFenceIdx := strings.Index(remaining, kirocommon.AltCodeFenceMarker) + + // Find the earliest fence marker + earliestFenceIdx := -1 + earliestFenceType := "" + if fenceIdx >= 0 && (altFenceIdx < 0 || fenceIdx < altFenceIdx) { + earliestFenceIdx = fenceIdx + earliestFenceType = kirocommon.CodeFenceMarker + } else if altFenceIdx >= 0 { + earliestFenceIdx = altFenceIdx + earliestFenceType = kirocommon.AltCodeFenceMarker + } + + if earliestFenceIdx >= 0 { + // Check if this fence comes before any thinking tag + thinkingIdx := strings.Index(remaining, kirocommon.ThinkingStartTag) + if inCodeBlock { + // Inside code block - check if this fence closes it + if earliestFenceType == codeFenceType { + // This fence closes the code block + // Emit content up to and including the fence as text + textToEmit := remaining[:earliestFenceIdx+len(earliestFenceType)] + if textToEmit != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + remaining = remaining[earliestFenceIdx+len(earliestFenceType):] + inCodeBlock = false + codeFenceType = "" + log.Debugf("kiro: exited code block") + continue + } + } else if thinkingIdx < 0 || earliestFenceIdx < thinkingIdx { + // Not in code block, and fence comes before thinking tag (or no thinking tag) + // Emit content up to and including the fence as text, then enter code block + textToEmit := remaining[:earliestFenceIdx+len(earliestFenceType)] + if textToEmit != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + remaining = remaining[earliestFenceIdx+len(earliestFenceType):] + inCodeBlock = true + codeFenceType = earliestFenceType + log.Debugf("kiro: entered code block with fence: %s", earliestFenceType) + continue + } + } + + // If inside code block, emit all content as text (don't parse thinking tags) + if inCodeBlock { + if remaining != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + break // Exit loop - all remaining content is inside code block + } + } + if inThinkBlock { // Inside thinking block - look for end tag - endIdx := strings.Index(remaining, kirocommon.ThinkingEndTag) + // CRITICAL FIX: Skip tags that are not the real end tag + // This prevents false positives when thinking content discusses these tags + // Pass current code block/inline code state for accurate detection + endIdx := findRealThinkingEndTag(remaining, inCodeBlock, inInlineCode) + if endIdx >= 0 { // Found end tag - emit any content before end tag, then close block thinkContent := remaining[:endIdx] @@ -1790,8 +2413,9 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } inThinkBlock = false + thinkingBlockCompleted = true // Mark that we've completed a thinking block remaining = remaining[endIdx+len(kirocommon.ThinkingEndTag):] - log.Debugf("kiro: exited thinking block") + log.Debugf("kiro: exited thinking block, subsequent tags will be treated as text") } else { // No end tag found - TRUE STREAMING: emit content immediately // Only save potential partial tag length for next iteration @@ -1837,11 +2461,29 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } else { // Outside thinking block - look for start tag - startIdx := strings.Index(remaining, kirocommon.ThinkingStartTag) + // CRITICAL FIX: Only parse tags at the very beginning of the response + // or if we haven't completed a thinking block yet. + // After a thinking block is completed, subsequent tags are likely + // discussion text (e.g., "Kiro returns `` tags") and should NOT be parsed. + startIdx := -1 + if !thinkingBlockCompleted && !hasSeenNonThinkingContent { + startIdx = strings.Index(remaining, kirocommon.ThinkingStartTag) + // If there's non-whitespace content before the tag, it's not a real thinking block + if startIdx > 0 { + textBefore := remaining[:startIdx] + if strings.TrimSpace(textBefore) != "" { + // There's real content before the tag - this is discussion text, not thinking + hasSeenNonThinkingContent = true + startIdx = -1 + log.Debugf("kiro: found tag after non-whitespace content, treating as text") + } + } + } if startIdx >= 0 { // Found start tag - emit text before it and switch to thinking mode textBefore := remaining[:startIdx] if textBefore != "" { + // Only whitespace before thinking tag is allowed // Start text content block if needed if !isTextBlockOpen { contentBlockIndex++ @@ -1881,11 +2523,19 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out log.Debugf("kiro: entered thinking block") } else { // No start tag found - check for partial start tag at buffer end - pendingStart := kiroclaude.PendingTagSuffix(remaining, kirocommon.ThinkingStartTag) + // Only check for partial tags if we haven't completed a thinking block yet + pendingStart := 0 + if !thinkingBlockCompleted && !hasSeenNonThinkingContent { + pendingStart = kiroclaude.PendingTagSuffix(remaining, kirocommon.ThinkingStartTag) + } if pendingStart > 0 { // Emit text except potential partial tag textToEmit := remaining[:len(remaining)-pendingStart] if textToEmit != "" { + // Mark that we've seen non-thinking content + if strings.TrimSpace(textToEmit) != "" { + hasSeenNonThinkingContent = true + } // Start text content block if needed if !isTextBlockOpen { contentBlockIndex++ @@ -1912,6 +2562,10 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } else { // No partial tag - emit all as text if remaining != "" { + // Mark that we've seen non-thinking content + if strings.TrimSpace(remaining) != "" { + hasSeenNonThinkingContent = true + } // Start text content block if needed if !isTextBlockOpen { contentBlockIndex++ @@ -2065,6 +2719,69 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if outputTokens, ok := event["outputTokens"].(float64); ok { totalUsage.OutputTokens = int64(outputTokens) } + + case "messageMetadataEvent": + // Handle message metadata events which may contain token counts + if metadata, ok := event["messageMetadataEvent"].(map[string]interface{}); ok { + if inputTokens, ok := metadata["inputTokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + log.Debugf("kiro: streamToChannel found inputTokens in messageMetadataEvent: %d", totalUsage.InputTokens) + } + if outputTokens, ok := metadata["outputTokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + log.Debugf("kiro: streamToChannel found outputTokens in messageMetadataEvent: %d", totalUsage.OutputTokens) + } + if totalTokens, ok := metadata["totalTokens"].(float64); ok { + totalUsage.TotalTokens = int64(totalTokens) + log.Debugf("kiro: streamToChannel found totalTokens in messageMetadataEvent: %d", totalUsage.TotalTokens) + } + } + + case "usageEvent", "usage": + // Handle dedicated usage events + if inputTokens, ok := event["inputTokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + log.Debugf("kiro: streamToChannel found inputTokens in usageEvent: %d", totalUsage.InputTokens) + } + if outputTokens, ok := event["outputTokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + log.Debugf("kiro: streamToChannel found outputTokens in usageEvent: %d", totalUsage.OutputTokens) + } + if totalTokens, ok := event["totalTokens"].(float64); ok { + totalUsage.TotalTokens = int64(totalTokens) + log.Debugf("kiro: streamToChannel found totalTokens in usageEvent: %d", totalUsage.TotalTokens) + } + // Also check nested usage object + if usageObj, ok := event["usage"].(map[string]interface{}); ok { + if inputTokens, ok := usageObj["input_tokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + } + if outputTokens, ok := usageObj["output_tokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + } + if totalTokens, ok := usageObj["total_tokens"].(float64); ok { + totalUsage.TotalTokens = int64(totalTokens) + } + log.Debugf("kiro: streamToChannel found usage object: input=%d, output=%d, total=%d", + totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens) + } + + case "metricsEvent": + // Handle metrics events which may contain usage data + if metrics, ok := event["metricsEvent"].(map[string]interface{}); ok { + if inputTokens, ok := metrics["inputTokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + } + if outputTokens, ok := metrics["outputTokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + } + log.Debugf("kiro: streamToChannel found metricsEvent: input=%d, output=%d", + totalUsage.InputTokens, totalUsage.OutputTokens) + } } // Check nested usage event @@ -2076,6 +2793,47 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out totalUsage.OutputTokens = int64(outputTokens) } } + + // Check for direct token fields in any event (fallback) + if totalUsage.InputTokens == 0 { + if inputTokens, ok := event["inputTokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + log.Debugf("kiro: streamToChannel found direct inputTokens: %d", totalUsage.InputTokens) + } + } + if totalUsage.OutputTokens == 0 { + if outputTokens, ok := event["outputTokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + log.Debugf("kiro: streamToChannel found direct outputTokens: %d", totalUsage.OutputTokens) + } + } + + // Check for usage object in any event (OpenAI format) + if totalUsage.InputTokens == 0 || totalUsage.OutputTokens == 0 { + if usageObj, ok := event["usage"].(map[string]interface{}); ok { + if totalUsage.InputTokens == 0 { + if inputTokens, ok := usageObj["input_tokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + } else if inputTokens, ok := usageObj["prompt_tokens"].(float64); ok { + totalUsage.InputTokens = int64(inputTokens) + } + } + if totalUsage.OutputTokens == 0 { + if outputTokens, ok := usageObj["output_tokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + } else if outputTokens, ok := usageObj["completion_tokens"].(float64); ok { + totalUsage.OutputTokens = int64(outputTokens) + } + } + if totalUsage.TotalTokens == 0 { + if totalTokens, ok := usageObj["total_tokens"].(float64); ok { + totalUsage.TotalTokens = int64(totalTokens) + } + } + log.Debugf("kiro: streamToChannel found usage object (fallback): input=%d, output=%d, total=%d", + totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens) + } + } } // Close content block if open @@ -2120,8 +2878,35 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out totalUsage.OutputTokens = 1 } } + + // Use contextUsagePercentage to calculate more accurate input tokens + // Kiro model has 200k max context, contextUsagePercentage represents the percentage used + // Formula: input_tokens = contextUsagePercentage * 200000 / 100 + // Note: The effective input context is ~170k (200k - 30k reserved for output) + if upstreamContextPercentage > 0 { + // Calculate input tokens from context percentage + // Using 200k as the base since that's what Kiro reports against + calculatedInputTokens := int64(upstreamContextPercentage * 200000 / 100) + + // Only use calculated value if it's significantly different from local estimate + // This provides more accurate token counts based on upstream data + if calculatedInputTokens > 0 { + localEstimate := totalUsage.InputTokens + totalUsage.InputTokens = calculatedInputTokens + log.Infof("kiro: using contextUsagePercentage (%.2f%%) to calculate input tokens: %d (local estimate was: %d)", + upstreamContextPercentage, calculatedInputTokens, localEstimate) + } + } + totalUsage.TotalTokens = totalUsage.InputTokens + totalUsage.OutputTokens + // Log upstream usage information if received + if hasUpstreamUsage { + log.Infof("kiro: upstream usage - credits: %.4f, context: %.2f%%, final tokens - input: %d, output: %d, total: %d", + upstreamCreditUsage, upstreamContextPercentage, + totalUsage.InputTokens, totalUsage.OutputTokens, totalUsage.TotalTokens) + } + // Determine stop reason: prefer upstream, then detect tool_use, default to end_turn stopReason := upstreamStopReason if stopReason == "" { @@ -2162,10 +2947,48 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // NOTE: Claude SSE event builders moved to internal/translator/kiro/claude/kiro_claude_stream.go // The executor now uses kiroclaude.BuildClaude*Event() functions instead -// CountTokens is not supported for Kiro provider. -// Kiro/Amazon Q backend doesn't expose a token counting API. -func (e *KiroExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for kiro"} +// CountTokens counts tokens locally using tiktoken since Kiro API doesn't expose a token counting endpoint. +// This provides approximate token counts for client requests. +func (e *KiroExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + // Use tiktoken for local token counting + enc, err := getTokenizer(req.Model) + if err != nil { + log.Warnf("kiro: CountTokens failed to get tokenizer: %v, falling back to estimate", err) + // Fallback: estimate from payload size (roughly 4 chars per token) + estimatedTokens := len(req.Payload) / 4 + if estimatedTokens == 0 && len(req.Payload) > 0 { + estimatedTokens = 1 + } + return cliproxyexecutor.Response{ + Payload: []byte(fmt.Sprintf(`{"count":%d}`, estimatedTokens)), + }, nil + } + + // Try to count tokens from the request payload + var totalTokens int64 + + // Try OpenAI chat format first + if tokens, countErr := countOpenAIChatTokens(enc, req.Payload); countErr == nil && tokens > 0 { + totalTokens = tokens + log.Debugf("kiro: CountTokens counted %d tokens using OpenAI chat format", totalTokens) + } else { + // Fallback: count raw payload tokens + if tokenCount, countErr := enc.Count(string(req.Payload)); countErr == nil { + totalTokens = int64(tokenCount) + log.Debugf("kiro: CountTokens counted %d tokens from raw payload", totalTokens) + } else { + // Final fallback: estimate from payload size + totalTokens = int64(len(req.Payload) / 4) + if totalTokens == 0 && len(req.Payload) > 0 { + totalTokens = 1 + } + log.Debugf("kiro: CountTokens estimated %d tokens from payload size", totalTokens) + } + } + + return cliproxyexecutor.Response{ + Payload: []byte(fmt.Sprintf(`{"count":%d}`, totalTokens)), + }, nil } // Refresh refreshes the Kiro OAuth token. diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go index 07472be4..ae42b186 100644 --- a/internal/translator/kiro/claude/kiro_claude_request.go +++ b/internal/translator/kiro/claude/kiro_claude_request.go @@ -30,6 +30,7 @@ type KiroPayload struct { type KiroInferenceConfig struct { MaxTokens int `json:"maxTokens,omitempty"` Temperature float64 `json:"temperature,omitempty"` + TopP float64 `json:"topP,omitempty"` } // KiroConversationState holds the conversation context @@ -136,9 +137,15 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo // Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint. func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte { // Extract max_tokens for potential use in inferenceConfig + // Handle -1 as "use maximum" (Kiro max output is ~32000 tokens) + const kiroMaxOutputTokens = 32000 var maxTokens int64 if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() { maxTokens = mt.Int() + if maxTokens == -1 { + maxTokens = kiroMaxOutputTokens + log.Debugf("kiro: max_tokens=-1 converted to %d", kiroMaxOutputTokens) + } } // Extract temperature if specified @@ -149,6 +156,15 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA hasTemperature = true } + // Extract top_p if specified + var topP float64 + var hasTopP bool + if tp := gjson.GetBytes(claudeBody, "top_p"); tp.Exists() { + topP = tp.Float() + hasTopP = true + log.Debugf("kiro: extracted top_p: %.2f", topP) + } + // Normalize origin value for Kiro API compatibility origin = normalizeOrigin(origin) log.Debugf("kiro: normalized origin value: %s", origin) @@ -164,8 +180,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA // Extract system prompt systemPrompt := extractSystemPrompt(claudeBody) - // Check for thinking mode - thinkingEnabled, budgetTokens := checkThinkingMode(claudeBody) + // Check for thinking mode using the comprehensive IsThinkingEnabled function + // This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format + thinkingEnabled := IsThinkingEnabled(claudeBody) + _, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available + if budgetTokens <= 0 { + // Calculate budgetTokens based on max_tokens if available + // Use 50% of max_tokens for thinking, with min 8000 and max 24000 + if maxTokens > 0 { + budgetTokens = maxTokens / 2 + if budgetTokens < 8000 { + budgetTokens = 8000 + } + if budgetTokens > 24000 { + budgetTokens = 24000 + } + log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens) + } else { + budgetTokens = 16000 // Default budget tokens + } + } // Inject timestamp context timestamp := time.Now().Format("2006-01-02 15:04:05 MST") @@ -185,6 +219,17 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA systemPrompt += kirocommon.KiroAgenticSystemPrompt } + // Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints + // Claude tool_choice values: {"type": "auto/any/tool", "name": "..."} + toolChoiceHint := extractClaudeToolChoiceHint(claudeBody) + if toolChoiceHint != "" { + if systemPrompt != "" { + systemPrompt += "\n" + } + systemPrompt += toolChoiceHint + log.Debugf("kiro: injected tool_choice hint into system prompt") + } + // Inject thinking hint when thinking mode is enabled if thinkingEnabled { if systemPrompt != "" { @@ -235,7 +280,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA // Build inferenceConfig if we have any inference parameters var inferenceConfig *KiroInferenceConfig - if maxTokens > 0 || hasTemperature { + if maxTokens > 0 || hasTemperature || hasTopP { inferenceConfig = &KiroInferenceConfig{} if maxTokens > 0 { inferenceConfig.MaxTokens = int(maxTokens) @@ -243,6 +288,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA if hasTemperature { inferenceConfig.Temperature = temperature } + if hasTopP { + inferenceConfig.TopP = topP + } } payload := KiroPayload{ @@ -324,6 +372,93 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) { return thinkingEnabled, budgetTokens } +// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled. +// This is used by the executor to determine whether to parse tags in responses. +// When thinking is NOT enabled in the request, tags in responses should be +// treated as regular text content, not as thinking blocks. +// +// Supports multiple formats: +// - Claude API format: thinking.type = "enabled" +// - OpenAI format: reasoning_effort parameter +// - AMP/Cursor format: interleaved in system prompt +func IsThinkingEnabled(body []byte) bool { + // Check Claude API format first (thinking.type = "enabled") + enabled, _ := checkThinkingMode(body) + if enabled { + log.Debugf("kiro: IsThinkingEnabled returning true (Claude API format)") + return true + } + + // Check OpenAI format: reasoning_effort parameter + // Valid values: "low", "medium", "high", "auto" (not "none") + reasoningEffort := gjson.GetBytes(body, "reasoning_effort") + if reasoningEffort.Exists() { + effort := reasoningEffort.String() + if effort != "" && effort != "none" { + log.Debugf("kiro: thinking mode enabled via OpenAI reasoning_effort: %s", effort) + return true + } + } + + // Check AMP/Cursor format: interleaved in system prompt + // This is how AMP client passes thinking configuration + bodyStr := string(body) + if strings.Contains(bodyStr, "") && strings.Contains(bodyStr, "") { + // Extract thinking mode value + startTag := "" + endTag := "" + startIdx := strings.Index(bodyStr, startTag) + if startIdx >= 0 { + startIdx += len(startTag) + endIdx := strings.Index(bodyStr[startIdx:], endTag) + if endIdx >= 0 { + thinkingMode := bodyStr[startIdx : startIdx+endIdx] + if thinkingMode == "interleaved" || thinkingMode == "enabled" { + log.Debugf("kiro: thinking mode enabled via AMP/Cursor format: %s", thinkingMode) + return true + } + } + } + } + + // Check OpenAI format: max_completion_tokens with reasoning (o1-style) + // Some clients use this to indicate reasoning mode + if gjson.GetBytes(body, "max_completion_tokens").Exists() { + // If max_completion_tokens is set, check if model name suggests reasoning + model := gjson.GetBytes(body, "model").String() + if strings.Contains(strings.ToLower(model), "thinking") || + strings.Contains(strings.ToLower(model), "reason") { + log.Debugf("kiro: thinking mode enabled via model name hint: %s", model) + return true + } + } + + log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)") + return false +} + +// shortenToolNameIfNeeded shortens tool names that exceed 64 characters. +// MCP tools often have long names like "mcp__server-name__tool-name". +// This preserves the "mcp__" prefix and last segment when possible. +func shortenToolNameIfNeeded(name string) string { + const limit = 64 + if len(name) <= limit { + return name + } + // For MCP tools, try to preserve prefix and last segment + if strings.HasPrefix(name, "mcp__") { + idx := strings.LastIndex(name, "__") + if idx > 0 { + cand := "mcp__" + name[idx+2:] + if len(cand) > limit { + return cand[:limit] + } + return cand + } + } + return name[:limit] +} + // convertClaudeToolsToKiro converts Claude tools to Kiro format func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper { var kiroTools []KiroToolWrapper @@ -336,6 +471,13 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper { description := tool.Get("description").String() inputSchema := tool.Get("input_schema").Value() + // Shorten tool name if it exceeds 64 characters (common with MCP tools) + originalName := name + name = shortenToolNameIfNeeded(name) + if name != originalName { + log.Debugf("kiro: shortened tool name from '%s' to '%s'", originalName, name) + } + // CRITICAL FIX: Kiro API requires non-empty description if strings.TrimSpace(description) == "" { description = fmt.Sprintf("Tool: %s", name) @@ -467,6 +609,34 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult { return unique } +// extractClaudeToolChoiceHint extracts tool_choice from Claude request and returns a system prompt hint. +// Claude tool_choice values: +// - {"type": "auto"}: Model decides (default, no hint needed) +// - {"type": "any"}: Must use at least one tool +// - {"type": "tool", "name": "..."}: Must use specific tool +func extractClaudeToolChoiceHint(claudeBody []byte) string { + toolChoice := gjson.GetBytes(claudeBody, "tool_choice") + if !toolChoice.Exists() { + return "" + } + + toolChoiceType := toolChoice.Get("type").String() + switch toolChoiceType { + case "any": + return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]" + case "tool": + toolName := toolChoice.Get("name").String() + if toolName != "" { + return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName) + } + case "auto": + // Default behavior, no hint needed + return "" + } + + return "" +} + // BuildUserMessageStruct builds a user message and extracts tool results func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) { content := msg.Get("content") diff --git a/internal/translator/kiro/common/constants.go b/internal/translator/kiro/common/constants.go index 1d4b0330..96174b8c 100644 --- a/internal/translator/kiro/common/constants.go +++ b/internal/translator/kiro/common/constants.go @@ -12,6 +12,15 @@ const ( // ThinkingEndTag is the end tag for thinking blocks in responses. ThinkingEndTag = "" + // CodeFenceMarker is the markdown code fence marker. + CodeFenceMarker = "```" + + // AltCodeFenceMarker is the alternative markdown code fence marker. + AltCodeFenceMarker = "~~~" + + // InlineCodeMarker is the markdown inline code marker (backtick). + InlineCodeMarker = "`" + // KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes. // AWS Kiro API has a 2-3 minute timeout for large file write operations. KiroAgenticSystemPrompt = ` diff --git a/internal/translator/kiro/openai/kiro_openai.go b/internal/translator/kiro/openai/kiro_openai.go index 35cd0424..d5822998 100644 --- a/internal/translator/kiro/openai/kiro_openai.go +++ b/internal/translator/kiro/openai/kiro_openai.go @@ -156,8 +156,9 @@ func ConvertKiroStreamToOpenAI(ctx context.Context, model string, originalReques } case "message_stop": - // Final event - emit [DONE] - results = append(results, BuildOpenAISSEDone()) + // Final event - do NOT emit [DONE] here + // The handler layer (openai_handlers.go) will send [DONE] when the stream closes + // Emitting [DONE] here would cause duplicate [DONE] markers case "ping": // Ping event with usage - optionally emit usage chunk diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go index 4aaa8b4e..21b15aa0 100644 --- a/internal/translator/kiro/openai/kiro_openai_request.go +++ b/internal/translator/kiro/openai/kiro_openai_request.go @@ -29,6 +29,7 @@ type KiroPayload struct { type KiroInferenceConfig struct { MaxTokens int `json:"maxTokens,omitempty"` Temperature float64 `json:"temperature,omitempty"` + TopP float64 `json:"topP,omitempty"` } // KiroConversationState holds the conversation context @@ -134,9 +135,15 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo // isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode). func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte { // Extract max_tokens for potential use in inferenceConfig + // Handle -1 as "use maximum" (Kiro max output is ~32000 tokens) + const kiroMaxOutputTokens = 32000 var maxTokens int64 if mt := gjson.GetBytes(openaiBody, "max_tokens"); mt.Exists() { maxTokens = mt.Int() + if maxTokens == -1 { + maxTokens = kiroMaxOutputTokens + log.Debugf("kiro-openai: max_tokens=-1 converted to %d", kiroMaxOutputTokens) + } } // Extract temperature if specified @@ -147,6 +154,15 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s hasTemperature = true } + // Extract top_p if specified + var topP float64 + var hasTopP bool + if tp := gjson.GetBytes(openaiBody, "top_p"); tp.Exists() { + topP = tp.Float() + hasTopP = true + log.Debugf("kiro-openai: extracted top_p: %.2f", topP) + } + // Normalize origin value for Kiro API compatibility origin = normalizeOrigin(origin) log.Debugf("kiro-openai: normalized origin value: %s", origin) @@ -180,6 +196,54 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s systemPrompt += kirocommon.KiroAgenticSystemPrompt } + // Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints + // OpenAI tool_choice values: "none", "auto", "required", or {"type":"function","function":{"name":"..."}} + toolChoiceHint := extractToolChoiceHint(openaiBody) + if toolChoiceHint != "" { + if systemPrompt != "" { + systemPrompt += "\n" + } + systemPrompt += toolChoiceHint + log.Debugf("kiro-openai: injected tool_choice hint into system prompt") + } + + // Handle response_format parameter - Kiro doesn't support it natively, so we inject system prompt hints + // OpenAI response_format: {"type": "json_object"} or {"type": "json_schema", "json_schema": {...}} + responseFormatHint := extractResponseFormatHint(openaiBody) + if responseFormatHint != "" { + if systemPrompt != "" { + systemPrompt += "\n" + } + systemPrompt += responseFormatHint + log.Debugf("kiro-openai: injected response_format hint into system prompt") + } + + // Check for thinking mode and inject thinking hint + // Supports OpenAI reasoning_effort parameter and model name hints + thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody) + if thinkingEnabled { + // Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort + // Use 50% of max_tokens for thinking, with min 8000 and max 24000 + if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set + calculatedBudget := maxTokens / 2 + if calculatedBudget < 8000 { + calculatedBudget = 8000 + } + if calculatedBudget > 24000 { + calculatedBudget = 24000 + } + budgetTokens = calculatedBudget + log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens) + } + + if systemPrompt != "" { + systemPrompt += "\n" + } + dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens) + systemPrompt += dynamicThinkingHint + log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens) + } + // Convert OpenAI tools to Kiro format kiroTools := convertOpenAIToolsToKiro(tools) @@ -220,7 +284,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s // Build inferenceConfig if we have any inference parameters var inferenceConfig *KiroInferenceConfig - if maxTokens > 0 || hasTemperature { + if maxTokens > 0 || hasTemperature || hasTopP { inferenceConfig = &KiroInferenceConfig{} if maxTokens > 0 { inferenceConfig.MaxTokens = int(maxTokens) @@ -228,6 +292,9 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s if hasTemperature { inferenceConfig.Temperature = temperature } + if hasTopP { + inferenceConfig.TopP = topP + } } payload := KiroPayload{ @@ -292,6 +359,28 @@ func extractSystemPromptFromOpenAI(messages gjson.Result) string { return strings.Join(systemParts, "\n") } +// shortenToolNameIfNeeded shortens tool names that exceed 64 characters. +// MCP tools often have long names like "mcp__server-name__tool-name". +// This preserves the "mcp__" prefix and last segment when possible. +func shortenToolNameIfNeeded(name string) string { + const limit = 64 + if len(name) <= limit { + return name + } + // For MCP tools, try to preserve prefix and last segment + if strings.HasPrefix(name, "mcp__") { + idx := strings.LastIndex(name, "__") + if idx > 0 { + cand := "mcp__" + name[idx+2:] + if len(cand) > limit { + return cand[:limit] + } + return cand + } + } + return name[:limit] +} + // convertOpenAIToolsToKiro converts OpenAI tools to Kiro format func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper { var kiroTools []KiroToolWrapper @@ -314,6 +403,13 @@ func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper { description := fn.Get("description").String() parameters := fn.Get("parameters").Value() + // Shorten tool name if it exceeds 64 characters (common with MCP tools) + originalName := name + name = shortenToolNameIfNeeded(name) + if name != originalName { + log.Debugf("kiro-openai: shortened tool name from '%s' to '%s'", originalName, name) + } + // CRITICAL FIX: Kiro API requires non-empty description if strings.TrimSpace(description) == "" { description = fmt.Sprintf("Tool: %s", name) @@ -584,6 +680,153 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul return finalContent } +// checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request. +// Returns (thinkingEnabled, budgetTokens). +// Supports: +// - reasoning_effort parameter (low/medium/high/auto) +// - Model name containing "thinking" or "reason" +// - tag in system prompt (AMP/Cursor format) +func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) { + var budgetTokens int64 = 16000 // Default budget + + // Check OpenAI format: reasoning_effort parameter + // Valid values: "low", "medium", "high", "auto" (not "none") + reasoningEffort := gjson.GetBytes(openaiBody, "reasoning_effort") + if reasoningEffort.Exists() { + effort := reasoningEffort.String() + if effort != "" && effort != "none" { + log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort) + // Adjust budget based on effort level + switch effort { + case "low": + budgetTokens = 8000 + case "medium": + budgetTokens = 16000 + case "high": + budgetTokens = 32000 + case "auto": + budgetTokens = 16000 + } + return true, budgetTokens + } + } + + // Check AMP/Cursor format: interleaved in system prompt + bodyStr := string(openaiBody) + if strings.Contains(bodyStr, "") && strings.Contains(bodyStr, "") { + startTag := "" + endTag := "" + startIdx := strings.Index(bodyStr, startTag) + if startIdx >= 0 { + startIdx += len(startTag) + endIdx := strings.Index(bodyStr[startIdx:], endTag) + if endIdx >= 0 { + thinkingMode := bodyStr[startIdx : startIdx+endIdx] + if thinkingMode == "interleaved" || thinkingMode == "enabled" { + log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode) + // Try to extract max_thinking_length if present + if maxLenStart := strings.Index(bodyStr, ""); maxLenStart >= 0 { + maxLenStart += len("") + if maxLenEnd := strings.Index(bodyStr[maxLenStart:], ""); maxLenEnd >= 0 { + maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd] + if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 { + log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens) + } + } + } + return true, budgetTokens + } + } + } + } + + // Check model name for thinking hints + model := gjson.GetBytes(openaiBody, "model").String() + modelLower := strings.ToLower(model) + if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") { + log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model) + return true, budgetTokens + } + + log.Debugf("kiro-openai: no thinking mode detected in OpenAI request") + return false, budgetTokens +} + +// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint. +// OpenAI tool_choice values: +// - "none": Don't use any tools +// - "auto": Model decides (default, no hint needed) +// - "required": Must use at least one tool +// - {"type":"function","function":{"name":"..."}} : Must use specific tool +func extractToolChoiceHint(openaiBody []byte) string { + toolChoice := gjson.GetBytes(openaiBody, "tool_choice") + if !toolChoice.Exists() { + return "" + } + + // Handle string values + if toolChoice.Type == gjson.String { + switch toolChoice.String() { + case "none": + // Note: When tool_choice is "none", we should ideally not pass tools at all + // But since we can't modify tool passing here, we add a strong hint + return "[INSTRUCTION: Do NOT use any tools. Respond with text only.]" + case "required": + return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]" + case "auto": + // Default behavior, no hint needed + return "" + } + } + + // Handle object value: {"type":"function","function":{"name":"..."}} + if toolChoice.IsObject() { + if toolChoice.Get("type").String() == "function" { + toolName := toolChoice.Get("function.name").String() + if toolName != "" { + return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName) + } + } + } + + return "" +} + +// extractResponseFormatHint extracts response_format from OpenAI request and returns a system prompt hint. +// OpenAI response_format values: +// - {"type": "text"}: Default, no hint needed +// - {"type": "json_object"}: Must respond with valid JSON +// - {"type": "json_schema", "json_schema": {...}}: Must respond with JSON matching schema +func extractResponseFormatHint(openaiBody []byte) string { + responseFormat := gjson.GetBytes(openaiBody, "response_format") + if !responseFormat.Exists() { + return "" + } + + formatType := responseFormat.Get("type").String() + switch formatType { + case "json_object": + return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]" + case "json_schema": + // Extract schema if provided + schema := responseFormat.Get("json_schema.schema") + if schema.Exists() { + schemaStr := schema.Raw + // Truncate if too long + if len(schemaStr) > 500 { + schemaStr = schemaStr[:500] + "..." + } + return fmt.Sprintf("[INSTRUCTION: You MUST respond with valid JSON that matches this schema: %s. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]", schemaStr) + } + return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]" + case "text": + // Default behavior, no hint needed + return "" + } + + return "" +} + // deduplicateToolResults removes duplicate tool results func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult { if len(toolResults) == 0 { diff --git a/internal/translator/kiro/openai/kiro_openai_stream.go b/internal/translator/kiro/openai/kiro_openai_stream.go index d550a8d8..e72d970e 100644 --- a/internal/translator/kiro/openai/kiro_openai_stream.go +++ b/internal/translator/kiro/openai/kiro_openai_stream.go @@ -5,7 +5,6 @@ package openai import ( "encoding/json" - "fmt" "time" "github.com/google/uuid" @@ -34,9 +33,12 @@ func NewOpenAIStreamState(model string) *OpenAIStreamState { } } -// FormatSSEEvent formats a JSON payload as an SSE event +// FormatSSEEvent formats a JSON payload for SSE streaming. +// Note: This returns raw JSON data without "data:" prefix. +// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go) +// to maintain architectural consistency and avoid double-prefix issues. func FormatSSEEvent(data []byte) string { - return fmt.Sprintf("data: %s", string(data)) + return string(data) } // BuildOpenAISSETextDelta creates an SSE event for text content delta @@ -130,9 +132,12 @@ func BuildOpenAISSEUsage(state *OpenAIStreamState, usageInfo usage.Detail) strin return FormatSSEEvent(result) } -// BuildOpenAISSEDone creates the final [DONE] SSE event +// BuildOpenAISSEDone creates the final [DONE] SSE event. +// Note: This returns raw "[DONE]" without "data:" prefix. +// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go) +// to maintain architectural consistency and avoid double-prefix issues. func BuildOpenAISSEDone() string { - return "data: [DONE]" + return "[DONE]" } // buildBaseChunk creates a base chunk structure for streaming