feat(kiro): enhance request translation and fix streaming issues

English:
- Fix <thinking> tag parsing: only parse at response start, avoid misinterpreting discussion text
- Add token counting support using tiktoken for local estimation
- Support top_p parameter in inference config
- Handle max_tokens=-1 as maximum (32000 tokens)
- Add tool_choice and response_format parameter handling via system prompt hints
- Support multiple thinking mode detection formats (Claude API, OpenAI reasoning_effort, AMP/Cursor)
- Shorten MCP tool names exceeding 64 characters
- Fix duplicate [DONE] marker in OpenAI SSE streaming
- Enhance token usage statistics with multiple event format support
- Add code fence markers to constants

中文:
- 修复 <thinking> 标签解析:仅在响应开头解析,避免误解析讨论文本中的标签
- 使用 tiktoken 实现本地 token 计数功能
- 支持 top_p 推理配置参数
- 处理 max_tokens=-1 转换为最大值(32000 tokens)
- 通过系统提示词注入实现 tool_choice 和 response_format 参数支持
- 支持多种思考模式检测格式(Claude API、OpenAI reasoning_effort、AMP/Cursor)
- 截断超过64字符的 MCP 工具名称
- 修复 OpenAI SSE 流中重复的 [DONE] 标记
- 增强 token 使用量统计,支持多种事件格式
- 添加代码围栏标记常量
This commit is contained in:
Ravens2121
2025-12-14 11:54:57 +08:00
parent 81ae09d0ec
commit 9c04c18c04
6 changed files with 1278 additions and 27 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,7 @@ type KiroPayload struct {
type KiroInferenceConfig struct {
MaxTokens int `json:"maxTokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"topP,omitempty"`
}
// KiroConversationState holds the conversation context
@@ -136,9 +137,15 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
var maxTokens int64
if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() {
maxTokens = mt.Int()
if maxTokens == -1 {
maxTokens = kiroMaxOutputTokens
log.Debugf("kiro: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
}
}
// Extract temperature if specified
@@ -149,6 +156,15 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
hasTemperature = true
}
// Extract top_p if specified
var topP float64
var hasTopP bool
if tp := gjson.GetBytes(claudeBody, "top_p"); tp.Exists() {
topP = tp.Float()
hasTopP = true
log.Debugf("kiro: extracted top_p: %.2f", topP)
}
// Normalize origin value for Kiro API compatibility
origin = normalizeOrigin(origin)
log.Debugf("kiro: normalized origin value: %s", origin)
@@ -164,8 +180,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
// Extract system prompt
systemPrompt := extractSystemPrompt(claudeBody)
// Check for thinking mode
thinkingEnabled, budgetTokens := checkThinkingMode(claudeBody)
// Check for thinking mode using the comprehensive IsThinkingEnabled function
// This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
thinkingEnabled := IsThinkingEnabled(claudeBody)
_, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
if budgetTokens <= 0 {
// Calculate budgetTokens based on max_tokens if available
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
if maxTokens > 0 {
budgetTokens = maxTokens / 2
if budgetTokens < 8000 {
budgetTokens = 8000
}
if budgetTokens > 24000 {
budgetTokens = 24000
}
log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
} else {
budgetTokens = 16000 // Default budget tokens
}
}
// Inject timestamp context
timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
@@ -185,6 +219,17 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
systemPrompt += kirocommon.KiroAgenticSystemPrompt
}
// Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
// Claude tool_choice values: {"type": "auto/any/tool", "name": "..."}
toolChoiceHint := extractClaudeToolChoiceHint(claudeBody)
if toolChoiceHint != "" {
if systemPrompt != "" {
systemPrompt += "\n"
}
systemPrompt += toolChoiceHint
log.Debugf("kiro: injected tool_choice hint into system prompt")
}
// Inject thinking hint when thinking mode is enabled
if thinkingEnabled {
if systemPrompt != "" {
@@ -235,7 +280,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
// Build inferenceConfig if we have any inference parameters
var inferenceConfig *KiroInferenceConfig
if maxTokens > 0 || hasTemperature {
if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
if maxTokens > 0 {
inferenceConfig.MaxTokens = int(maxTokens)
@@ -243,6 +288,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
if hasTemperature {
inferenceConfig.Temperature = temperature
}
if hasTopP {
inferenceConfig.TopP = topP
}
}
payload := KiroPayload{
@@ -324,6 +372,93 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
return thinkingEnabled, budgetTokens
}
// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
// This is used by the executor to determine whether to parse <thinking> tags in responses.
// When thinking is NOT enabled in the request, <thinking> tags in responses should be
// treated as regular text content, not as thinking blocks.
//
// Supports multiple formats:
// - Claude API format: thinking.type = "enabled"
// - OpenAI format: reasoning_effort parameter
// - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
func IsThinkingEnabled(body []byte) bool {
// Check Claude API format first (thinking.type = "enabled")
enabled, _ := checkThinkingMode(body)
if enabled {
log.Debugf("kiro: IsThinkingEnabled returning true (Claude API format)")
return true
}
// Check OpenAI format: reasoning_effort parameter
// Valid values: "low", "medium", "high", "auto" (not "none")
reasoningEffort := gjson.GetBytes(body, "reasoning_effort")
if reasoningEffort.Exists() {
effort := reasoningEffort.String()
if effort != "" && effort != "none" {
log.Debugf("kiro: thinking mode enabled via OpenAI reasoning_effort: %s", effort)
return true
}
}
// Check AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
// This is how AMP client passes thinking configuration
bodyStr := string(body)
if strings.Contains(bodyStr, "<thinking_mode>") && strings.Contains(bodyStr, "</thinking_mode>") {
// Extract thinking mode value
startTag := "<thinking_mode>"
endTag := "</thinking_mode>"
startIdx := strings.Index(bodyStr, startTag)
if startIdx >= 0 {
startIdx += len(startTag)
endIdx := strings.Index(bodyStr[startIdx:], endTag)
if endIdx >= 0 {
thinkingMode := bodyStr[startIdx : startIdx+endIdx]
if thinkingMode == "interleaved" || thinkingMode == "enabled" {
log.Debugf("kiro: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
return true
}
}
}
}
// Check OpenAI format: max_completion_tokens with reasoning (o1-style)
// Some clients use this to indicate reasoning mode
if gjson.GetBytes(body, "max_completion_tokens").Exists() {
// If max_completion_tokens is set, check if model name suggests reasoning
model := gjson.GetBytes(body, "model").String()
if strings.Contains(strings.ToLower(model), "thinking") ||
strings.Contains(strings.ToLower(model), "reason") {
log.Debugf("kiro: thinking mode enabled via model name hint: %s", model)
return true
}
}
log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)")
return false
}
// shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
// MCP tools often have long names like "mcp__server-name__tool-name".
// This preserves the "mcp__" prefix and last segment when possible.
func shortenToolNameIfNeeded(name string) string {
const limit = 64
if len(name) <= limit {
return name
}
// For MCP tools, try to preserve prefix and last segment
if strings.HasPrefix(name, "mcp__") {
idx := strings.LastIndex(name, "__")
if idx > 0 {
cand := "mcp__" + name[idx+2:]
if len(cand) > limit {
return cand[:limit]
}
return cand
}
}
return name[:limit]
}
// convertClaudeToolsToKiro converts Claude tools to Kiro format
func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
var kiroTools []KiroToolWrapper
@@ -336,6 +471,13 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
description := tool.Get("description").String()
inputSchema := tool.Get("input_schema").Value()
// Shorten tool name if it exceeds 64 characters (common with MCP tools)
originalName := name
name = shortenToolNameIfNeeded(name)
if name != originalName {
log.Debugf("kiro: shortened tool name from '%s' to '%s'", originalName, name)
}
// CRITICAL FIX: Kiro API requires non-empty description
if strings.TrimSpace(description) == "" {
description = fmt.Sprintf("Tool: %s", name)
@@ -467,6 +609,34 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
return unique
}
// extractClaudeToolChoiceHint extracts tool_choice from Claude request and returns a system prompt hint.
// Claude tool_choice values:
// - {"type": "auto"}: Model decides (default, no hint needed)
// - {"type": "any"}: Must use at least one tool
// - {"type": "tool", "name": "..."}: Must use specific tool
func extractClaudeToolChoiceHint(claudeBody []byte) string {
toolChoice := gjson.GetBytes(claudeBody, "tool_choice")
if !toolChoice.Exists() {
return ""
}
toolChoiceType := toolChoice.Get("type").String()
switch toolChoiceType {
case "any":
return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
case "tool":
toolName := toolChoice.Get("name").String()
if toolName != "" {
return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
}
case "auto":
// Default behavior, no hint needed
return ""
}
return ""
}
// BuildUserMessageStruct builds a user message and extracts tool results
func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) {
content := msg.Get("content")

View File

@@ -12,6 +12,15 @@ const (
// ThinkingEndTag is the end tag for thinking blocks in responses.
ThinkingEndTag = "</thinking>"
// CodeFenceMarker is the markdown code fence marker.
CodeFenceMarker = "```"
// AltCodeFenceMarker is the alternative markdown code fence marker.
AltCodeFenceMarker = "~~~"
// InlineCodeMarker is the markdown inline code marker (backtick).
InlineCodeMarker = "`"
// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
// AWS Kiro API has a 2-3 minute timeout for large file write operations.
KiroAgenticSystemPrompt = `

View File

@@ -156,8 +156,9 @@ func ConvertKiroStreamToOpenAI(ctx context.Context, model string, originalReques
}
case "message_stop":
// Final event - emit [DONE]
results = append(results, BuildOpenAISSEDone())
// Final event - do NOT emit [DONE] here
// The handler layer (openai_handlers.go) will send [DONE] when the stream closes
// Emitting [DONE] here would cause duplicate [DONE] markers
case "ping":
// Ping event with usage - optionally emit usage chunk

View File

@@ -29,6 +29,7 @@ type KiroPayload struct {
type KiroInferenceConfig struct {
MaxTokens int `json:"maxTokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"topP,omitempty"`
}
// KiroConversationState holds the conversation context
@@ -134,9 +135,15 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
var maxTokens int64
if mt := gjson.GetBytes(openaiBody, "max_tokens"); mt.Exists() {
maxTokens = mt.Int()
if maxTokens == -1 {
maxTokens = kiroMaxOutputTokens
log.Debugf("kiro-openai: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
}
}
// Extract temperature if specified
@@ -147,6 +154,15 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
hasTemperature = true
}
// Extract top_p if specified
var topP float64
var hasTopP bool
if tp := gjson.GetBytes(openaiBody, "top_p"); tp.Exists() {
topP = tp.Float()
hasTopP = true
log.Debugf("kiro-openai: extracted top_p: %.2f", topP)
}
// Normalize origin value for Kiro API compatibility
origin = normalizeOrigin(origin)
log.Debugf("kiro-openai: normalized origin value: %s", origin)
@@ -180,6 +196,54 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
systemPrompt += kirocommon.KiroAgenticSystemPrompt
}
// Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
// OpenAI tool_choice values: "none", "auto", "required", or {"type":"function","function":{"name":"..."}}
toolChoiceHint := extractToolChoiceHint(openaiBody)
if toolChoiceHint != "" {
if systemPrompt != "" {
systemPrompt += "\n"
}
systemPrompt += toolChoiceHint
log.Debugf("kiro-openai: injected tool_choice hint into system prompt")
}
// Handle response_format parameter - Kiro doesn't support it natively, so we inject system prompt hints
// OpenAI response_format: {"type": "json_object"} or {"type": "json_schema", "json_schema": {...}}
responseFormatHint := extractResponseFormatHint(openaiBody)
if responseFormatHint != "" {
if systemPrompt != "" {
systemPrompt += "\n"
}
systemPrompt += responseFormatHint
log.Debugf("kiro-openai: injected response_format hint into system prompt")
}
// Check for thinking mode and inject thinking hint
// Supports OpenAI reasoning_effort parameter and model name hints
thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody)
if thinkingEnabled {
// Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set
calculatedBudget := maxTokens / 2
if calculatedBudget < 8000 {
calculatedBudget = 8000
}
if calculatedBudget > 24000 {
calculatedBudget = 24000
}
budgetTokens = calculatedBudget
log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
}
if systemPrompt != "" {
systemPrompt += "\n"
}
dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
systemPrompt += dynamicThinkingHint
log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
}
// Convert OpenAI tools to Kiro format
kiroTools := convertOpenAIToolsToKiro(tools)
@@ -220,7 +284,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
// Build inferenceConfig if we have any inference parameters
var inferenceConfig *KiroInferenceConfig
if maxTokens > 0 || hasTemperature {
if maxTokens > 0 || hasTemperature || hasTopP {
inferenceConfig = &KiroInferenceConfig{}
if maxTokens > 0 {
inferenceConfig.MaxTokens = int(maxTokens)
@@ -228,6 +292,9 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
if hasTemperature {
inferenceConfig.Temperature = temperature
}
if hasTopP {
inferenceConfig.TopP = topP
}
}
payload := KiroPayload{
@@ -292,6 +359,28 @@ func extractSystemPromptFromOpenAI(messages gjson.Result) string {
return strings.Join(systemParts, "\n")
}
// shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
// MCP tools often have long names like "mcp__server-name__tool-name".
// This preserves the "mcp__" prefix and last segment when possible.
func shortenToolNameIfNeeded(name string) string {
const limit = 64
if len(name) <= limit {
return name
}
// For MCP tools, try to preserve prefix and last segment
if strings.HasPrefix(name, "mcp__") {
idx := strings.LastIndex(name, "__")
if idx > 0 {
cand := "mcp__" + name[idx+2:]
if len(cand) > limit {
return cand[:limit]
}
return cand
}
}
return name[:limit]
}
// convertOpenAIToolsToKiro converts OpenAI tools to Kiro format
func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
var kiroTools []KiroToolWrapper
@@ -314,6 +403,13 @@ func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
description := fn.Get("description").String()
parameters := fn.Get("parameters").Value()
// Shorten tool name if it exceeds 64 characters (common with MCP tools)
originalName := name
name = shortenToolNameIfNeeded(name)
if name != originalName {
log.Debugf("kiro-openai: shortened tool name from '%s' to '%s'", originalName, name)
}
// CRITICAL FIX: Kiro API requires non-empty description
if strings.TrimSpace(description) == "" {
description = fmt.Sprintf("Tool: %s", name)
@@ -584,6 +680,153 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul
return finalContent
}
// checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request.
// Returns (thinkingEnabled, budgetTokens).
// Supports:
// - reasoning_effort parameter (low/medium/high/auto)
// - Model name containing "thinking" or "reason"
// - <thinking_mode> tag in system prompt (AMP/Cursor format)
func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
var budgetTokens int64 = 16000 // Default budget
// Check OpenAI format: reasoning_effort parameter
// Valid values: "low", "medium", "high", "auto" (not "none")
reasoningEffort := gjson.GetBytes(openaiBody, "reasoning_effort")
if reasoningEffort.Exists() {
effort := reasoningEffort.String()
if effort != "" && effort != "none" {
log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort)
// Adjust budget based on effort level
switch effort {
case "low":
budgetTokens = 8000
case "medium":
budgetTokens = 16000
case "high":
budgetTokens = 32000
case "auto":
budgetTokens = 16000
}
return true, budgetTokens
}
}
// Check AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
bodyStr := string(openaiBody)
if strings.Contains(bodyStr, "<thinking_mode>") && strings.Contains(bodyStr, "</thinking_mode>") {
startTag := "<thinking_mode>"
endTag := "</thinking_mode>"
startIdx := strings.Index(bodyStr, startTag)
if startIdx >= 0 {
startIdx += len(startTag)
endIdx := strings.Index(bodyStr[startIdx:], endTag)
if endIdx >= 0 {
thinkingMode := bodyStr[startIdx : startIdx+endIdx]
if thinkingMode == "interleaved" || thinkingMode == "enabled" {
log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
// Try to extract max_thinking_length if present
if maxLenStart := strings.Index(bodyStr, "<max_thinking_length>"); maxLenStart >= 0 {
maxLenStart += len("<max_thinking_length>")
if maxLenEnd := strings.Index(bodyStr[maxLenStart:], "</max_thinking_length>"); maxLenEnd >= 0 {
maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd]
if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 {
log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens)
}
}
}
return true, budgetTokens
}
}
}
}
// Check model name for thinking hints
model := gjson.GetBytes(openaiBody, "model").String()
modelLower := strings.ToLower(model)
if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model)
return true, budgetTokens
}
log.Debugf("kiro-openai: no thinking mode detected in OpenAI request")
return false, budgetTokens
}
// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
// OpenAI tool_choice values:
// - "none": Don't use any tools
// - "auto": Model decides (default, no hint needed)
// - "required": Must use at least one tool
// - {"type":"function","function":{"name":"..."}} : Must use specific tool
func extractToolChoiceHint(openaiBody []byte) string {
toolChoice := gjson.GetBytes(openaiBody, "tool_choice")
if !toolChoice.Exists() {
return ""
}
// Handle string values
if toolChoice.Type == gjson.String {
switch toolChoice.String() {
case "none":
// Note: When tool_choice is "none", we should ideally not pass tools at all
// But since we can't modify tool passing here, we add a strong hint
return "[INSTRUCTION: Do NOT use any tools. Respond with text only.]"
case "required":
return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
case "auto":
// Default behavior, no hint needed
return ""
}
}
// Handle object value: {"type":"function","function":{"name":"..."}}
if toolChoice.IsObject() {
if toolChoice.Get("type").String() == "function" {
toolName := toolChoice.Get("function.name").String()
if toolName != "" {
return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
}
}
}
return ""
}
// extractResponseFormatHint extracts response_format from OpenAI request and returns a system prompt hint.
// OpenAI response_format values:
// - {"type": "text"}: Default, no hint needed
// - {"type": "json_object"}: Must respond with valid JSON
// - {"type": "json_schema", "json_schema": {...}}: Must respond with JSON matching schema
func extractResponseFormatHint(openaiBody []byte) string {
responseFormat := gjson.GetBytes(openaiBody, "response_format")
if !responseFormat.Exists() {
return ""
}
formatType := responseFormat.Get("type").String()
switch formatType {
case "json_object":
return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
case "json_schema":
// Extract schema if provided
schema := responseFormat.Get("json_schema.schema")
if schema.Exists() {
schemaStr := schema.Raw
// Truncate if too long
if len(schemaStr) > 500 {
schemaStr = schemaStr[:500] + "..."
}
return fmt.Sprintf("[INSTRUCTION: You MUST respond with valid JSON that matches this schema: %s. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]", schemaStr)
}
return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
case "text":
// Default behavior, no hint needed
return ""
}
return ""
}
// deduplicateToolResults removes duplicate tool results
func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
if len(toolResults) == 0 {

View File

@@ -5,7 +5,6 @@ package openai
import (
"encoding/json"
"fmt"
"time"
"github.com/google/uuid"
@@ -34,9 +33,12 @@ func NewOpenAIStreamState(model string) *OpenAIStreamState {
}
}
// FormatSSEEvent formats a JSON payload as an SSE event
// FormatSSEEvent formats a JSON payload for SSE streaming.
// Note: This returns raw JSON data without "data:" prefix.
// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
// to maintain architectural consistency and avoid double-prefix issues.
func FormatSSEEvent(data []byte) string {
return fmt.Sprintf("data: %s", string(data))
return string(data)
}
// BuildOpenAISSETextDelta creates an SSE event for text content delta
@@ -130,9 +132,12 @@ func BuildOpenAISSEUsage(state *OpenAIStreamState, usageInfo usage.Detail) strin
return FormatSSEEvent(result)
}
// BuildOpenAISSEDone creates the final [DONE] SSE event
// BuildOpenAISSEDone creates the final [DONE] SSE event.
// Note: This returns raw "[DONE]" without "data:" prefix.
// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
// to maintain architectural consistency and avoid double-prefix issues.
func BuildOpenAISSEDone() string {
return "data: [DONE]"
return "[DONE]"
}
// buildBaseChunk creates a base chunk structure for streaming