mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-05-08 04:56:04 +08:00
feat(kiro): enhance request translation and fix streaming issues
English: - Fix <thinking> tag parsing: only parse at response start, avoid misinterpreting discussion text - Add token counting support using tiktoken for local estimation - Support top_p parameter in inference config - Handle max_tokens=-1 as maximum (32000 tokens) - Add tool_choice and response_format parameter handling via system prompt hints - Support multiple thinking mode detection formats (Claude API, OpenAI reasoning_effort, AMP/Cursor) - Shorten MCP tool names exceeding 64 characters - Fix duplicate [DONE] marker in OpenAI SSE streaming - Enhance token usage statistics with multiple event format support - Add code fence markers to constants 中文: - 修复 <thinking> 标签解析:仅在响应开头解析,避免误解析讨论文本中的标签 - 使用 tiktoken 实现本地 token 计数功能 - 支持 top_p 推理配置参数 - 处理 max_tokens=-1 转换为最大值(32000 tokens) - 通过系统提示词注入实现 tool_choice 和 response_format 参数支持 - 支持多种思考模式检测格式(Claude API、OpenAI reasoning_effort、AMP/Cursor) - 截断超过64字符的 MCP 工具名称 - 修复 OpenAI SSE 流中重复的 [DONE] 标记 - 增强 token 使用量统计,支持多种事件格式 - 添加代码围栏标记常量
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ type KiroPayload struct {
|
||||
type KiroInferenceConfig struct {
|
||||
MaxTokens int `json:"maxTokens,omitempty"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"topP,omitempty"`
|
||||
}
|
||||
|
||||
// KiroConversationState holds the conversation context
|
||||
@@ -136,9 +137,15 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
|
||||
// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
|
||||
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
|
||||
// Extract max_tokens for potential use in inferenceConfig
|
||||
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
|
||||
const kiroMaxOutputTokens = 32000
|
||||
var maxTokens int64
|
||||
if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() {
|
||||
maxTokens = mt.Int()
|
||||
if maxTokens == -1 {
|
||||
maxTokens = kiroMaxOutputTokens
|
||||
log.Debugf("kiro: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// Extract temperature if specified
|
||||
@@ -149,6 +156,15 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
hasTemperature = true
|
||||
}
|
||||
|
||||
// Extract top_p if specified
|
||||
var topP float64
|
||||
var hasTopP bool
|
||||
if tp := gjson.GetBytes(claudeBody, "top_p"); tp.Exists() {
|
||||
topP = tp.Float()
|
||||
hasTopP = true
|
||||
log.Debugf("kiro: extracted top_p: %.2f", topP)
|
||||
}
|
||||
|
||||
// Normalize origin value for Kiro API compatibility
|
||||
origin = normalizeOrigin(origin)
|
||||
log.Debugf("kiro: normalized origin value: %s", origin)
|
||||
@@ -164,8 +180,26 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
// Extract system prompt
|
||||
systemPrompt := extractSystemPrompt(claudeBody)
|
||||
|
||||
// Check for thinking mode
|
||||
thinkingEnabled, budgetTokens := checkThinkingMode(claudeBody)
|
||||
// Check for thinking mode using the comprehensive IsThinkingEnabled function
|
||||
// This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
|
||||
thinkingEnabled := IsThinkingEnabled(claudeBody)
|
||||
_, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
|
||||
if budgetTokens <= 0 {
|
||||
// Calculate budgetTokens based on max_tokens if available
|
||||
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
|
||||
if maxTokens > 0 {
|
||||
budgetTokens = maxTokens / 2
|
||||
if budgetTokens < 8000 {
|
||||
budgetTokens = 8000
|
||||
}
|
||||
if budgetTokens > 24000 {
|
||||
budgetTokens = 24000
|
||||
}
|
||||
log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
|
||||
} else {
|
||||
budgetTokens = 16000 // Default budget tokens
|
||||
}
|
||||
}
|
||||
|
||||
// Inject timestamp context
|
||||
timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
|
||||
@@ -185,6 +219,17 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
systemPrompt += kirocommon.KiroAgenticSystemPrompt
|
||||
}
|
||||
|
||||
// Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
|
||||
// Claude tool_choice values: {"type": "auto/any/tool", "name": "..."}
|
||||
toolChoiceHint := extractClaudeToolChoiceHint(claudeBody)
|
||||
if toolChoiceHint != "" {
|
||||
if systemPrompt != "" {
|
||||
systemPrompt += "\n"
|
||||
}
|
||||
systemPrompt += toolChoiceHint
|
||||
log.Debugf("kiro: injected tool_choice hint into system prompt")
|
||||
}
|
||||
|
||||
// Inject thinking hint when thinking mode is enabled
|
||||
if thinkingEnabled {
|
||||
if systemPrompt != "" {
|
||||
@@ -235,7 +280,7 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
|
||||
// Build inferenceConfig if we have any inference parameters
|
||||
var inferenceConfig *KiroInferenceConfig
|
||||
if maxTokens > 0 || hasTemperature {
|
||||
if maxTokens > 0 || hasTemperature || hasTopP {
|
||||
inferenceConfig = &KiroInferenceConfig{}
|
||||
if maxTokens > 0 {
|
||||
inferenceConfig.MaxTokens = int(maxTokens)
|
||||
@@ -243,6 +288,9 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
if hasTemperature {
|
||||
inferenceConfig.Temperature = temperature
|
||||
}
|
||||
if hasTopP {
|
||||
inferenceConfig.TopP = topP
|
||||
}
|
||||
}
|
||||
|
||||
payload := KiroPayload{
|
||||
@@ -324,6 +372,93 @@ func checkThinkingMode(claudeBody []byte) (bool, int64) {
|
||||
return thinkingEnabled, budgetTokens
|
||||
}
|
||||
|
||||
// IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
|
||||
// This is used by the executor to determine whether to parse <thinking> tags in responses.
|
||||
// When thinking is NOT enabled in the request, <thinking> tags in responses should be
|
||||
// treated as regular text content, not as thinking blocks.
|
||||
//
|
||||
// Supports multiple formats:
|
||||
// - Claude API format: thinking.type = "enabled"
|
||||
// - OpenAI format: reasoning_effort parameter
|
||||
// - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
|
||||
func IsThinkingEnabled(body []byte) bool {
|
||||
// Check Claude API format first (thinking.type = "enabled")
|
||||
enabled, _ := checkThinkingMode(body)
|
||||
if enabled {
|
||||
log.Debugf("kiro: IsThinkingEnabled returning true (Claude API format)")
|
||||
return true
|
||||
}
|
||||
|
||||
// Check OpenAI format: reasoning_effort parameter
|
||||
// Valid values: "low", "medium", "high", "auto" (not "none")
|
||||
reasoningEffort := gjson.GetBytes(body, "reasoning_effort")
|
||||
if reasoningEffort.Exists() {
|
||||
effort := reasoningEffort.String()
|
||||
if effort != "" && effort != "none" {
|
||||
log.Debugf("kiro: thinking mode enabled via OpenAI reasoning_effort: %s", effort)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
|
||||
// This is how AMP client passes thinking configuration
|
||||
bodyStr := string(body)
|
||||
if strings.Contains(bodyStr, "<thinking_mode>") && strings.Contains(bodyStr, "</thinking_mode>") {
|
||||
// Extract thinking mode value
|
||||
startTag := "<thinking_mode>"
|
||||
endTag := "</thinking_mode>"
|
||||
startIdx := strings.Index(bodyStr, startTag)
|
||||
if startIdx >= 0 {
|
||||
startIdx += len(startTag)
|
||||
endIdx := strings.Index(bodyStr[startIdx:], endTag)
|
||||
if endIdx >= 0 {
|
||||
thinkingMode := bodyStr[startIdx : startIdx+endIdx]
|
||||
if thinkingMode == "interleaved" || thinkingMode == "enabled" {
|
||||
log.Debugf("kiro: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check OpenAI format: max_completion_tokens with reasoning (o1-style)
|
||||
// Some clients use this to indicate reasoning mode
|
||||
if gjson.GetBytes(body, "max_completion_tokens").Exists() {
|
||||
// If max_completion_tokens is set, check if model name suggests reasoning
|
||||
model := gjson.GetBytes(body, "model").String()
|
||||
if strings.Contains(strings.ToLower(model), "thinking") ||
|
||||
strings.Contains(strings.ToLower(model), "reason") {
|
||||
log.Debugf("kiro: thinking mode enabled via model name hint: %s", model)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)")
|
||||
return false
|
||||
}
|
||||
|
||||
// shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
|
||||
// MCP tools often have long names like "mcp__server-name__tool-name".
|
||||
// This preserves the "mcp__" prefix and last segment when possible.
|
||||
func shortenToolNameIfNeeded(name string) string {
|
||||
const limit = 64
|
||||
if len(name) <= limit {
|
||||
return name
|
||||
}
|
||||
// For MCP tools, try to preserve prefix and last segment
|
||||
if strings.HasPrefix(name, "mcp__") {
|
||||
idx := strings.LastIndex(name, "__")
|
||||
if idx > 0 {
|
||||
cand := "mcp__" + name[idx+2:]
|
||||
if len(cand) > limit {
|
||||
return cand[:limit]
|
||||
}
|
||||
return cand
|
||||
}
|
||||
}
|
||||
return name[:limit]
|
||||
}
|
||||
|
||||
// convertClaudeToolsToKiro converts Claude tools to Kiro format
|
||||
func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
|
||||
var kiroTools []KiroToolWrapper
|
||||
@@ -336,6 +471,13 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
|
||||
description := tool.Get("description").String()
|
||||
inputSchema := tool.Get("input_schema").Value()
|
||||
|
||||
// Shorten tool name if it exceeds 64 characters (common with MCP tools)
|
||||
originalName := name
|
||||
name = shortenToolNameIfNeeded(name)
|
||||
if name != originalName {
|
||||
log.Debugf("kiro: shortened tool name from '%s' to '%s'", originalName, name)
|
||||
}
|
||||
|
||||
// CRITICAL FIX: Kiro API requires non-empty description
|
||||
if strings.TrimSpace(description) == "" {
|
||||
description = fmt.Sprintf("Tool: %s", name)
|
||||
@@ -467,6 +609,34 @@ func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
|
||||
return unique
|
||||
}
|
||||
|
||||
// extractClaudeToolChoiceHint extracts tool_choice from Claude request and returns a system prompt hint.
|
||||
// Claude tool_choice values:
|
||||
// - {"type": "auto"}: Model decides (default, no hint needed)
|
||||
// - {"type": "any"}: Must use at least one tool
|
||||
// - {"type": "tool", "name": "..."}: Must use specific tool
|
||||
func extractClaudeToolChoiceHint(claudeBody []byte) string {
|
||||
toolChoice := gjson.GetBytes(claudeBody, "tool_choice")
|
||||
if !toolChoice.Exists() {
|
||||
return ""
|
||||
}
|
||||
|
||||
toolChoiceType := toolChoice.Get("type").String()
|
||||
switch toolChoiceType {
|
||||
case "any":
|
||||
return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
|
||||
case "tool":
|
||||
toolName := toolChoice.Get("name").String()
|
||||
if toolName != "" {
|
||||
return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
|
||||
}
|
||||
case "auto":
|
||||
// Default behavior, no hint needed
|
||||
return ""
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// BuildUserMessageStruct builds a user message and extracts tool results
|
||||
func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) {
|
||||
content := msg.Get("content")
|
||||
|
||||
@@ -12,6 +12,15 @@ const (
|
||||
// ThinkingEndTag is the end tag for thinking blocks in responses.
|
||||
ThinkingEndTag = "</thinking>"
|
||||
|
||||
// CodeFenceMarker is the markdown code fence marker.
|
||||
CodeFenceMarker = "```"
|
||||
|
||||
// AltCodeFenceMarker is the alternative markdown code fence marker.
|
||||
AltCodeFenceMarker = "~~~"
|
||||
|
||||
// InlineCodeMarker is the markdown inline code marker (backtick).
|
||||
InlineCodeMarker = "`"
|
||||
|
||||
// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
|
||||
// AWS Kiro API has a 2-3 minute timeout for large file write operations.
|
||||
KiroAgenticSystemPrompt = `
|
||||
|
||||
@@ -156,8 +156,9 @@ func ConvertKiroStreamToOpenAI(ctx context.Context, model string, originalReques
|
||||
}
|
||||
|
||||
case "message_stop":
|
||||
// Final event - emit [DONE]
|
||||
results = append(results, BuildOpenAISSEDone())
|
||||
// Final event - do NOT emit [DONE] here
|
||||
// The handler layer (openai_handlers.go) will send [DONE] when the stream closes
|
||||
// Emitting [DONE] here would cause duplicate [DONE] markers
|
||||
|
||||
case "ping":
|
||||
// Ping event with usage - optionally emit usage chunk
|
||||
|
||||
@@ -29,6 +29,7 @@ type KiroPayload struct {
|
||||
type KiroInferenceConfig struct {
|
||||
MaxTokens int `json:"maxTokens,omitempty"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"topP,omitempty"`
|
||||
}
|
||||
|
||||
// KiroConversationState holds the conversation context
|
||||
@@ -134,9 +135,15 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
|
||||
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
|
||||
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
|
||||
// Extract max_tokens for potential use in inferenceConfig
|
||||
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
|
||||
const kiroMaxOutputTokens = 32000
|
||||
var maxTokens int64
|
||||
if mt := gjson.GetBytes(openaiBody, "max_tokens"); mt.Exists() {
|
||||
maxTokens = mt.Int()
|
||||
if maxTokens == -1 {
|
||||
maxTokens = kiroMaxOutputTokens
|
||||
log.Debugf("kiro-openai: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// Extract temperature if specified
|
||||
@@ -147,6 +154,15 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
hasTemperature = true
|
||||
}
|
||||
|
||||
// Extract top_p if specified
|
||||
var topP float64
|
||||
var hasTopP bool
|
||||
if tp := gjson.GetBytes(openaiBody, "top_p"); tp.Exists() {
|
||||
topP = tp.Float()
|
||||
hasTopP = true
|
||||
log.Debugf("kiro-openai: extracted top_p: %.2f", topP)
|
||||
}
|
||||
|
||||
// Normalize origin value for Kiro API compatibility
|
||||
origin = normalizeOrigin(origin)
|
||||
log.Debugf("kiro-openai: normalized origin value: %s", origin)
|
||||
@@ -180,6 +196,54 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
systemPrompt += kirocommon.KiroAgenticSystemPrompt
|
||||
}
|
||||
|
||||
// Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
|
||||
// OpenAI tool_choice values: "none", "auto", "required", or {"type":"function","function":{"name":"..."}}
|
||||
toolChoiceHint := extractToolChoiceHint(openaiBody)
|
||||
if toolChoiceHint != "" {
|
||||
if systemPrompt != "" {
|
||||
systemPrompt += "\n"
|
||||
}
|
||||
systemPrompt += toolChoiceHint
|
||||
log.Debugf("kiro-openai: injected tool_choice hint into system prompt")
|
||||
}
|
||||
|
||||
// Handle response_format parameter - Kiro doesn't support it natively, so we inject system prompt hints
|
||||
// OpenAI response_format: {"type": "json_object"} or {"type": "json_schema", "json_schema": {...}}
|
||||
responseFormatHint := extractResponseFormatHint(openaiBody)
|
||||
if responseFormatHint != "" {
|
||||
if systemPrompt != "" {
|
||||
systemPrompt += "\n"
|
||||
}
|
||||
systemPrompt += responseFormatHint
|
||||
log.Debugf("kiro-openai: injected response_format hint into system prompt")
|
||||
}
|
||||
|
||||
// Check for thinking mode and inject thinking hint
|
||||
// Supports OpenAI reasoning_effort parameter and model name hints
|
||||
thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody)
|
||||
if thinkingEnabled {
|
||||
// Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort
|
||||
// Use 50% of max_tokens for thinking, with min 8000 and max 24000
|
||||
if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set
|
||||
calculatedBudget := maxTokens / 2
|
||||
if calculatedBudget < 8000 {
|
||||
calculatedBudget = 8000
|
||||
}
|
||||
if calculatedBudget > 24000 {
|
||||
calculatedBudget = 24000
|
||||
}
|
||||
budgetTokens = calculatedBudget
|
||||
log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
|
||||
}
|
||||
|
||||
if systemPrompt != "" {
|
||||
systemPrompt += "\n"
|
||||
}
|
||||
dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
|
||||
systemPrompt += dynamicThinkingHint
|
||||
log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
|
||||
}
|
||||
|
||||
// Convert OpenAI tools to Kiro format
|
||||
kiroTools := convertOpenAIToolsToKiro(tools)
|
||||
|
||||
@@ -220,7 +284,7 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
|
||||
// Build inferenceConfig if we have any inference parameters
|
||||
var inferenceConfig *KiroInferenceConfig
|
||||
if maxTokens > 0 || hasTemperature {
|
||||
if maxTokens > 0 || hasTemperature || hasTopP {
|
||||
inferenceConfig = &KiroInferenceConfig{}
|
||||
if maxTokens > 0 {
|
||||
inferenceConfig.MaxTokens = int(maxTokens)
|
||||
@@ -228,6 +292,9 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
if hasTemperature {
|
||||
inferenceConfig.Temperature = temperature
|
||||
}
|
||||
if hasTopP {
|
||||
inferenceConfig.TopP = topP
|
||||
}
|
||||
}
|
||||
|
||||
payload := KiroPayload{
|
||||
@@ -292,6 +359,28 @@ func extractSystemPromptFromOpenAI(messages gjson.Result) string {
|
||||
return strings.Join(systemParts, "\n")
|
||||
}
|
||||
|
||||
// shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
|
||||
// MCP tools often have long names like "mcp__server-name__tool-name".
|
||||
// This preserves the "mcp__" prefix and last segment when possible.
|
||||
func shortenToolNameIfNeeded(name string) string {
|
||||
const limit = 64
|
||||
if len(name) <= limit {
|
||||
return name
|
||||
}
|
||||
// For MCP tools, try to preserve prefix and last segment
|
||||
if strings.HasPrefix(name, "mcp__") {
|
||||
idx := strings.LastIndex(name, "__")
|
||||
if idx > 0 {
|
||||
cand := "mcp__" + name[idx+2:]
|
||||
if len(cand) > limit {
|
||||
return cand[:limit]
|
||||
}
|
||||
return cand
|
||||
}
|
||||
}
|
||||
return name[:limit]
|
||||
}
|
||||
|
||||
// convertOpenAIToolsToKiro converts OpenAI tools to Kiro format
|
||||
func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
|
||||
var kiroTools []KiroToolWrapper
|
||||
@@ -314,6 +403,13 @@ func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
|
||||
description := fn.Get("description").String()
|
||||
parameters := fn.Get("parameters").Value()
|
||||
|
||||
// Shorten tool name if it exceeds 64 characters (common with MCP tools)
|
||||
originalName := name
|
||||
name = shortenToolNameIfNeeded(name)
|
||||
if name != originalName {
|
||||
log.Debugf("kiro-openai: shortened tool name from '%s' to '%s'", originalName, name)
|
||||
}
|
||||
|
||||
// CRITICAL FIX: Kiro API requires non-empty description
|
||||
if strings.TrimSpace(description) == "" {
|
||||
description = fmt.Sprintf("Tool: %s", name)
|
||||
@@ -584,6 +680,153 @@ func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResul
|
||||
return finalContent
|
||||
}
|
||||
|
||||
// checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request.
|
||||
// Returns (thinkingEnabled, budgetTokens).
|
||||
// Supports:
|
||||
// - reasoning_effort parameter (low/medium/high/auto)
|
||||
// - Model name containing "thinking" or "reason"
|
||||
// - <thinking_mode> tag in system prompt (AMP/Cursor format)
|
||||
func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
|
||||
var budgetTokens int64 = 16000 // Default budget
|
||||
|
||||
// Check OpenAI format: reasoning_effort parameter
|
||||
// Valid values: "low", "medium", "high", "auto" (not "none")
|
||||
reasoningEffort := gjson.GetBytes(openaiBody, "reasoning_effort")
|
||||
if reasoningEffort.Exists() {
|
||||
effort := reasoningEffort.String()
|
||||
if effort != "" && effort != "none" {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort)
|
||||
// Adjust budget based on effort level
|
||||
switch effort {
|
||||
case "low":
|
||||
budgetTokens = 8000
|
||||
case "medium":
|
||||
budgetTokens = 16000
|
||||
case "high":
|
||||
budgetTokens = 32000
|
||||
case "auto":
|
||||
budgetTokens = 16000
|
||||
}
|
||||
return true, budgetTokens
|
||||
}
|
||||
}
|
||||
|
||||
// Check AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
|
||||
bodyStr := string(openaiBody)
|
||||
if strings.Contains(bodyStr, "<thinking_mode>") && strings.Contains(bodyStr, "</thinking_mode>") {
|
||||
startTag := "<thinking_mode>"
|
||||
endTag := "</thinking_mode>"
|
||||
startIdx := strings.Index(bodyStr, startTag)
|
||||
if startIdx >= 0 {
|
||||
startIdx += len(startTag)
|
||||
endIdx := strings.Index(bodyStr[startIdx:], endTag)
|
||||
if endIdx >= 0 {
|
||||
thinkingMode := bodyStr[startIdx : startIdx+endIdx]
|
||||
if thinkingMode == "interleaved" || thinkingMode == "enabled" {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
|
||||
// Try to extract max_thinking_length if present
|
||||
if maxLenStart := strings.Index(bodyStr, "<max_thinking_length>"); maxLenStart >= 0 {
|
||||
maxLenStart += len("<max_thinking_length>")
|
||||
if maxLenEnd := strings.Index(bodyStr[maxLenStart:], "</max_thinking_length>"); maxLenEnd >= 0 {
|
||||
maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd]
|
||||
if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 {
|
||||
log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
return true, budgetTokens
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check model name for thinking hints
|
||||
model := gjson.GetBytes(openaiBody, "model").String()
|
||||
modelLower := strings.ToLower(model)
|
||||
if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
|
||||
log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model)
|
||||
return true, budgetTokens
|
||||
}
|
||||
|
||||
log.Debugf("kiro-openai: no thinking mode detected in OpenAI request")
|
||||
return false, budgetTokens
|
||||
}
|
||||
|
||||
// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
|
||||
// OpenAI tool_choice values:
|
||||
// - "none": Don't use any tools
|
||||
// - "auto": Model decides (default, no hint needed)
|
||||
// - "required": Must use at least one tool
|
||||
// - {"type":"function","function":{"name":"..."}} : Must use specific tool
|
||||
func extractToolChoiceHint(openaiBody []byte) string {
|
||||
toolChoice := gjson.GetBytes(openaiBody, "tool_choice")
|
||||
if !toolChoice.Exists() {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Handle string values
|
||||
if toolChoice.Type == gjson.String {
|
||||
switch toolChoice.String() {
|
||||
case "none":
|
||||
// Note: When tool_choice is "none", we should ideally not pass tools at all
|
||||
// But since we can't modify tool passing here, we add a strong hint
|
||||
return "[INSTRUCTION: Do NOT use any tools. Respond with text only.]"
|
||||
case "required":
|
||||
return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
|
||||
case "auto":
|
||||
// Default behavior, no hint needed
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Handle object value: {"type":"function","function":{"name":"..."}}
|
||||
if toolChoice.IsObject() {
|
||||
if toolChoice.Get("type").String() == "function" {
|
||||
toolName := toolChoice.Get("function.name").String()
|
||||
if toolName != "" {
|
||||
return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractResponseFormatHint extracts response_format from OpenAI request and returns a system prompt hint.
|
||||
// OpenAI response_format values:
|
||||
// - {"type": "text"}: Default, no hint needed
|
||||
// - {"type": "json_object"}: Must respond with valid JSON
|
||||
// - {"type": "json_schema", "json_schema": {...}}: Must respond with JSON matching schema
|
||||
func extractResponseFormatHint(openaiBody []byte) string {
|
||||
responseFormat := gjson.GetBytes(openaiBody, "response_format")
|
||||
if !responseFormat.Exists() {
|
||||
return ""
|
||||
}
|
||||
|
||||
formatType := responseFormat.Get("type").String()
|
||||
switch formatType {
|
||||
case "json_object":
|
||||
return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
|
||||
case "json_schema":
|
||||
// Extract schema if provided
|
||||
schema := responseFormat.Get("json_schema.schema")
|
||||
if schema.Exists() {
|
||||
schemaStr := schema.Raw
|
||||
// Truncate if too long
|
||||
if len(schemaStr) > 500 {
|
||||
schemaStr = schemaStr[:500] + "..."
|
||||
}
|
||||
return fmt.Sprintf("[INSTRUCTION: You MUST respond with valid JSON that matches this schema: %s. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]", schemaStr)
|
||||
}
|
||||
return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
|
||||
case "text":
|
||||
// Default behavior, no hint needed
|
||||
return ""
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// deduplicateToolResults removes duplicate tool results
|
||||
func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
|
||||
if len(toolResults) == 0 {
|
||||
|
||||
@@ -5,7 +5,6 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -34,9 +33,12 @@ func NewOpenAIStreamState(model string) *OpenAIStreamState {
|
||||
}
|
||||
}
|
||||
|
||||
// FormatSSEEvent formats a JSON payload as an SSE event
|
||||
// FormatSSEEvent formats a JSON payload for SSE streaming.
|
||||
// Note: This returns raw JSON data without "data:" prefix.
|
||||
// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
|
||||
// to maintain architectural consistency and avoid double-prefix issues.
|
||||
func FormatSSEEvent(data []byte) string {
|
||||
return fmt.Sprintf("data: %s", string(data))
|
||||
return string(data)
|
||||
}
|
||||
|
||||
// BuildOpenAISSETextDelta creates an SSE event for text content delta
|
||||
@@ -130,9 +132,12 @@ func BuildOpenAISSEUsage(state *OpenAIStreamState, usageInfo usage.Detail) strin
|
||||
return FormatSSEEvent(result)
|
||||
}
|
||||
|
||||
// BuildOpenAISSEDone creates the final [DONE] SSE event
|
||||
// BuildOpenAISSEDone creates the final [DONE] SSE event.
|
||||
// Note: This returns raw "[DONE]" without "data:" prefix.
|
||||
// The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
|
||||
// to maintain architectural consistency and avoid double-prefix issues.
|
||||
func BuildOpenAISSEDone() string {
|
||||
return "data: [DONE]"
|
||||
return "[DONE]"
|
||||
}
|
||||
|
||||
// buildBaseChunk creates a base chunk structure for streaming
|
||||
|
||||
Reference in New Issue
Block a user