diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 03b9783..7508c60 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -1,11 +1,8 @@ { "permissions": { "allow": [ - "Bash(dir e:\\\\CodeAI\\\\github\\\\cursor2api:*)", - "Bash(cmd:*)", - "WebFetch(domain:wttr.in)", - "mcp__fetch__fetch", - "mcp__filesystem__directory_tree" + "mcp__filesystem__read_text_file", + "WebSearch" ] } } diff --git a/CHANGELOG.md b/CHANGELOG.md index 180e312..e859e86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +## v2.5.6 (2026-03-12) + +### 🗜️ 渐进式历史压缩 + +- **新策略**:保留最近 6 条消息完整不动,仅压缩早期消息中超过 2000 字符的文本部分 +- 不删除任何消息(保留完整对话结构),只截短单条消息的超长文本 +- 兼顾上下文完整性与输出空间,替代之前被移除的全删式智能压缩 +- 工具描述截断从 200 → **80 字符**(Schema 已包含参数信息,短描述节省输入体积) +- 工具结果截断从 30000 → **15000 字符**(为输出留更多空间) + +### 🔧 续写智能去重 + +- **问题**:模型续写时经常重复截断点附近的内容,拼接后出现重复段落 +- **新增 `deduplicateContinuation()`**:在原内容尾部和续写头部之间搜索最长重叠,自动移除重复部分 +- 支持字符级精确匹配和行级模糊匹配两种去重策略 +- 去重后无新内容时自动停止续写(防止无限循环) +- 流式和非流式路径均已集成 + +### ⚡ 非流式截断续写(与流式路径对齐) + +- **问题**:非流式模式下 Write 大文件等长输出被截断后,Claude Code 直接收到不完整的工具调用 JSON,导致 `tool_use` 退化为纯文本 +- **修复**:非流式路径新增内部截断续写(最多 6 次),与流式路径逻辑完全对齐 +- 新增 `tool_choice=any` 强制重试(非流式):模型未输出工具调用时自动追加强制消息重试 +- 新增极短响应重试(非流式):响应 < 10 字符时自动重试 + +### 📊 Token 估算优化 + +- 提取 `estimateInputTokens()` 为独立函数,Anthropic 和 OpenAI handler 共用 +- 估算比例从 1/4 调整为 **1/3**(更适合中英文混合和代码场景)+ 10% 安全边距 +- 新增工具定义的 token 估算(每个压缩工具签名 ~200 chars + 1000 chars 指令开销) +- 替代之前 `input_tokens: 100` 的硬编码占位符 + +### 🛡️ JSON 解析器加固 + +- **反斜杠计数精确化**:`tolerantParse` 和 `parseToolCalls` 中的字符串状态跟踪从 `escaped` 布尔标志改为**反向计数连续反斜杠**,正确处理 `\\\"` (未转义) vs `\\\\\\\"` (已转义) 等边界情况 +- **新增第五层逆向贪婪提取**:当所有 JSON 修复手段失败时,对 Write/Edit 等工具的 `content`/`command`/`text` 等大值字段进行逆向贪婪提取,从 JSON 末尾向前搜索值的结束引号 +- 小值字段(`file_path`、`path` 等)仍用精确正则提取 + +--- + ## v2.5.5 (2026-03-12) ### 🐛 修复长响应误判为拒绝 diff --git a/README.md b/README.md index 36c98e1..ff4364f 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ - **多层拒绝拦截** - 自动检测和抑制 Cursor 文档助手的拒绝行为(工具和非工具模式均生效) - **三层身份保护** - 身份探针拦截 + 拒绝重试 + 响应清洗,确保输出永远呈现 Claude 身份 - **🆕 截断无缝续写** - Proxy 底层自动拼接被截断的工具响应(代码块/XML未闭合),防止工具调用在长输出中退化为纯文本,彻底代替粗暴的上下文压缩解决失忆问题。 +- **🆕 续写智能去重** - 模型续写时自动检测并移除与截断点重叠的重复内容,防止拼接后出现重复段落 +- **🆕 渐进式历史压缩** - 保留最近6条消息完整,仅截短早期消息超长文本,兼顾上下文完整性与输出空间 - **🆕 Schema 压缩** - 工具定义从完整 JSON Schema (~135k chars) 压缩为紧凑类型签名 (~15k chars),大幅提升 Cursor API 输出预算 - **🆕 JSON 感知解析器** - 正确处理 Write/Edit 工具 content 中的嵌入式代码块,避免工具参数被 markdown ``` 标记截断 - **连续同角色消息自动合并** - 满足 Anthropic API 交替要求,解决 Cursor IDE 发送格式兼容问题 @@ -156,6 +158,28 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use` ## 更新日志 +### v2.5.6 (2026-03-12) — 渐进式压缩 + 续写去重 + 非流式续写对齐 + Token 估算优化 + +**🗜️ 渐进式历史压缩** +- 保留最近 6 条消息完整,仅截短早期超长文本至 2000 字符 +- 工具描述 200→80 chars、工具结果 30k→15k chars,为输出留更多空间 + +**🔧 续写智能去重 `deduplicateContinuation()`** +- 字符级+行级双重去重策略,全部重复时自动停止续写 +- 流式和非流式路径均已集成 + +**⚡ 非流式截断续写(与流式路径对齐)** +- 非流式路径新增内部续写(最多 6 次) +- 新增 `tool_choice=any` 强制重试 + 极短响应重试 + +**📊 Token 估算优化** +- `estimateInputTokens()` 独立函数,两端共用 +- 比例 1/4→1/3 + 10% 安全边距 + 工具定义估算 + +**🛡️ JSON 解析器加固** +- 反斜杠精确计数替代布尔标志 +- 新增第五层逆向贪婪提取大值字段 + ### v2.5.3 (2026-03-11) — Schema 压缩 + JSON 感知解析器 + 续写重写 **Schema 压缩 — 根治截断问题** diff --git a/package.json b/package.json index d0c3086..0166520 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cursor2api", - "version": "2.5.5", + "version": "2.5.6", "description": "Proxy Cursor docs AI to Anthropic Messages API for Claude Code", "type": "module", "scripts": { diff --git a/src/converter.ts b/src/converter.ts index 2c238f6..baaf072 100644 --- a/src/converter.ts +++ b/src/converter.ts @@ -76,7 +76,8 @@ function buildToolInstructions( // ★ 使用紧凑 Schema 替代完整 JSON Schema 以大幅减小输入体积 const schema = tool.input_schema ? compactSchema(tool.input_schema) : '{}'; // 截断过长的工具描述(部分客户端的工具描述可达数千字符) - const desc = (tool.description || 'No description').substring(0, 200); + // ★ 80 chars 足矣:Schema 已包含参数信息,短描述减少输入体积,为输出留更多空间 + const desc = (tool.description || 'No description').substring(0, 80); return `- **${tool.name}**: ${desc}\n Params: ${schema}`; }).join('\n'); @@ -287,10 +288,25 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise KEEP_RECENT + 2) { // +2 for few-shot messages + const compressEnd = messages.length - KEEP_RECENT; + for (let i = 2; i < compressEnd; i++) { // 从 index 2 开始跳过 few-shot + const msg = messages[i]; + for (const part of msg.parts) { + if (part.text && part.text.length > EARLY_MSG_MAX_CHARS) { + const originalLen = part.text.length; + part.text = part.text.substring(0, EARLY_MSG_MAX_CHARS) + + `\n\n... [truncated ${originalLen - EARLY_MSG_MAX_CHARS} chars for context budget]`; + console.log(`[Converter] 📦 压缩早期消息 msg[${i}] (${msg.role}): ${originalLen} → ${part.text.length} chars`); + } + } + } + } // 诊断日志:记录发给 Cursor docs AI 的消息摘要 let totalChars = 0; @@ -311,7 +327,8 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise= 0 && fixed[j] === '\\'; j--) { + backslashCount++; + } + if (backslashCount % 2 === 0) { + // 偶数个反斜杠 → 引号未被转义 → 切换字符串状态 + inString = !inString; + } fixed += char; - } else if (char === '"' && !escaped) { - inString = !inString; - fixed += char; - escaped = false; - } else { - if (inString) { - // 裸控制字符转义 - if (char === '\n') { - fixed += '\\n'; - } else if (char === '\r') { - fixed += '\\r'; - } else if (char === '\t') { - fixed += '\\t'; - } else { - fixed += char; - } + continue; + } + + if (inString) { + // 裸控制字符转义 + if (char === '\n') { + fixed += '\\n'; + } else if (char === '\r') { + fixed += '\\r'; + } else if (char === '\t') { + fixed += '\\t'; } else { - // 在字符串外跟踪括号层级 - if (char === '{' || char === '[') { - bracketStack.push(char === '{' ? '}' : ']'); - } else if (char === '}' || char === ']') { - if (bracketStack.length > 0) bracketStack.pop(); - } fixed += char; } - escaped = false; + } else { + // 在字符串外跟踪括号层级 + if (char === '{' || char === '[') { + bracketStack.push(char === '{' ? '}' : ']'); + } else if (char === '}' || char === ']') { + if (bracketStack.length > 0) bracketStack.pop(); + } + fixed += char; } } @@ -530,20 +550,21 @@ function tolerantParse(jsonStr: string): any { let params: Record = {}; if (paramsMatch) { const paramsStr = paramsMatch[1]; - // 逐字符找到 parameters 对象的闭合 } + // 逐字符找到 parameters 对象的闭合 },使用精确反斜杠计数 let depth = 0; let end = -1; let pInString = false; - let pEscaped = false; for (let i = 0; i < paramsStr.length; i++) { const c = paramsStr[i]; - if (c === '\\' && !pEscaped) { pEscaped = true; continue; } - if (c === '"' && !pEscaped) { pInString = !pInString; } + if (c === '"') { + let bsc = 0; + for (let j = i - 1; j >= 0 && paramsStr[j] === '\\'; j--) bsc++; + if (bsc % 2 === 0) pInString = !pInString; + } if (!pInString) { if (c === '{') depth++; if (c === '}') { depth--; if (depth === 0) { end = i; break; } } } - pEscaped = false; } if (end > 0) { const rawParams = paramsStr.substring(0, end + 1); @@ -564,6 +585,67 @@ function tolerantParse(jsonStr: string): any { } } catch { /* ignore */ } + // ★ 第五次尝试:逆向贪婪提取大值字段 + // 专门处理 Write/Edit 工具的 content 参数包含未转义引号导致 JSON 完全损坏的情况 + // 策略:先找到 tool 名,然后对 content/command/text 等大值字段, + // 取该字段 "key": " 后面到最后一个可能的闭合点之间的所有内容 + try { + const toolMatch2 = jsonStr.match(/["'](?:tool|name)["']\s*:\s*["']([^"']+)["']/); + if (toolMatch2) { + const toolName = toolMatch2[1]; + const params: Record = {}; + + // 大值字段列表(这些字段最容易包含有问题的内容) + const bigValueFields = ['content', 'command', 'text', 'new_string', 'new_str', 'file_text', 'code']; + // 小值字段仍用正则精确提取 + const smallFieldRegex = /"(file_path|path|file|old_string|old_str|insert_line|mode|encoding|description|language|name)"\s*:\s*"((?:[^"\\]|\\.)*)"/g; + let sfm; + while ((sfm = smallFieldRegex.exec(jsonStr)) !== null) { + params[sfm[1]] = sfm[2].replace(/\\n/g, '\n').replace(/\\t/g, '\t').replace(/\\\\/g, '\\'); + } + + // 对大值字段进行贪婪提取:从 "content": " 开始,到倒数第二个 " 结束 + for (const field of bigValueFields) { + const fieldStart = jsonStr.indexOf(`"${field}"`); + if (fieldStart === -1) continue; + + // 找到 ": " 后的第一个引号 + const colonPos = jsonStr.indexOf(':', fieldStart + field.length + 2); + if (colonPos === -1) continue; + const valueStart = jsonStr.indexOf('"', colonPos); + if (valueStart === -1) continue; + + // 从末尾逆向查找:跳过可能的 }]} 和空白,找到值的结束引号 + let valueEnd = jsonStr.length - 1; + // 跳过尾部的 }, ], 空白 + while (valueEnd > valueStart && /[}\]\s,]/.test(jsonStr[valueEnd])) { + valueEnd--; + } + // 此时 valueEnd 应该指向值的结束引号 + if (jsonStr[valueEnd] === '"' && valueEnd > valueStart + 1) { + const rawValue = jsonStr.substring(valueStart + 1, valueEnd); + // 尝试解码 JSON 转义序列 + try { + params[field] = JSON.parse(`"${rawValue}"`); + } catch { + // 如果解码失败,做基本替换 + params[field] = rawValue + .replace(/\\n/g, '\n') + .replace(/\\t/g, '\t') + .replace(/\\r/g, '\r') + .replace(/\\\\/g, '\\') + .replace(/\\"/g, '"'); + } + } + } + + if (Object.keys(params).length > 0) { + console.log(`[Converter] tolerantParse 逆向贪婪提取成功: tool=${toolName}, fields=[${Object.keys(params).join(', ')}]`); + return { tool: toolName, parameters: params }; + } + } + } catch { /* ignore */ } + // 全部修复手段失败,重新抛出 throw _e2; } @@ -595,26 +677,23 @@ export function parseToolCalls(responseText: string): { // 从内容起始处向前扫描,跳过 JSON 字符串内部的 ``` let pos = contentStart; let inJsonString = false; - let escaped = false; let closingPos = -1; while (pos < responseText.length - 2) { const char = responseText[pos]; - if (escaped) { - escaped = false; - pos++; - continue; - } - - if (char === '\\' && inJsonString) { - escaped = true; - pos++; - continue; - } - if (char === '"') { - inJsonString = !inJsonString; + // ★ 精确反斜杠计数:计算引号前连续反斜杠的数量 + // 只有奇数个反斜杠时引号才是被转义的 + // 例如: \" → 转义(1个\), \\" → 未转义(2个\), \\\" → 转义(3个\) + let backslashCount = 0; + for (let j = pos - 1; j >= contentStart && responseText[j] === '\\'; j--) { + backslashCount++; + } + if (backslashCount % 2 === 0) { + // 偶数个反斜杠 → 引号未被转义 → 切换字符串状态 + inJsonString = !inJsonString; + } pos++; continue; } diff --git a/src/handler.ts b/src/handler.ts index e1a4f55..08953c9 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -135,18 +135,32 @@ export function listModels(_req: Request, res: Response): void { // ==================== Token 计数 ==================== -export function countTokens(req: Request, res: Response): void { - const body = req.body as AnthropicRequest; +export function estimateInputTokens(body: AnthropicRequest): number { let totalChars = 0; if (body.system) { totalChars += typeof body.system === 'string' ? body.system.length : JSON.stringify(body.system).length; } + for (const msg of body.messages ?? []) { totalChars += typeof msg.content === 'string' ? msg.content.length : JSON.stringify(msg.content).length; } - res.json({ input_tokens: Math.max(1, Math.ceil(totalChars / 4)) }); + // Tool schemas are heavily compressed by compactSchema in converter.ts. + // However, they still consume Cursor's context budget. + // If not counted, Claude CLI will dangerously underestimate context size. + if (body.tools && body.tools.length > 0) { + totalChars += body.tools.length * 200; // ~200 chars per compressed tool signature + totalChars += 1000; // Tool use guidelines and behavior instructions + } + + // Safer estimation for mixed Chinese/English and Code: 1 token ≈ 3 chars + 10% safety margin. + return Math.max(1, Math.ceil((totalChars / 3) * 1.1)); +} + +export function countTokens(req: Request, res: Response): void { + const body = req.body as AnthropicRequest; + res.json({ input_tokens: estimateInputTokens(body) }); } // ==================== 身份探针拦截 ==================== @@ -430,6 +444,79 @@ export function isTruncated(text: string): boolean { return false; } +// ==================== 续写去重 ==================== + +/** + * 续写拼接智能去重 + * + * 模型续写时经常重复截断点附近的内容,导致拼接后出现重复段落。 + * 此函数在 existing 的尾部和 continuation 的头部之间寻找最长重叠, + * 然后返回去除重叠部分的 continuation。 + * + * 算法:从续写内容的头部取不同长度的前缀,检查是否出现在原内容的尾部 + */ +function deduplicateContinuation(existing: string, continuation: string): string { + if (!continuation || !existing) return continuation; + + // 对比窗口:取原内容尾部和续写头部的最大重叠检测范围 + const maxOverlap = Math.min(500, existing.length, continuation.length); + if (maxOverlap < 10) return continuation; // 太短不值得去重 + + const tail = existing.slice(-maxOverlap); + + // 从长到短搜索重叠:找最长的匹配 + let bestOverlap = 0; + for (let len = maxOverlap; len >= 10; len--) { + const prefix = continuation.substring(0, len); + // 检查 prefix 是否出现在 tail 的末尾 + if (tail.endsWith(prefix)) { + bestOverlap = len; + break; + } + } + + // 如果没找到尾部完全匹配的重叠,尝试行级别的去重 + // 场景:模型从某一行的开头重新开始,但截断点可能在行中间 + if (bestOverlap === 0) { + const continuationLines = continuation.split('\n'); + const tailLines = tail.split('\n'); + + // 从续写的第一行开始,在原内容尾部的行中寻找匹配 + if (continuationLines.length > 0 && tailLines.length > 0) { + const firstContLine = continuationLines[0].trim(); + if (firstContLine.length >= 10) { + // 检查续写的前几行是否在原内容尾部出现过 + for (let i = tailLines.length - 1; i >= 0; i--) { + if (tailLines[i].trim() === firstContLine) { + // 从这一行开始往后对比连续匹配的行数 + let matchedLines = 1; + for (let k = 1; k < continuationLines.length && i + k < tailLines.length; k++) { + if (continuationLines[k].trim() === tailLines[i + k].trim()) { + matchedLines++; + } else { + break; + } + } + if (matchedLines >= 2) { + // 移除续写中匹配的行 + const deduped = continuationLines.slice(matchedLines).join('\n'); + console.log(`[Handler] 行级去重: 移除了续写前 ${matchedLines} 行的重复内容`); + return deduped; + } + break; + } + } + } + } + } + + if (bestOverlap > 0) { + return continuation.substring(bestOverlap); + } + + return continuation; +} + // ==================== 重试辅助 ==================== export const MAX_REFUSAL_RETRIES = 2; @@ -487,7 +574,7 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A message: { id, type: 'message', role: 'assistant', content: [], model, stop_reason: null, stop_sequence: null, - usage: { input_tokens: 100, output_tokens: 0 }, + usage: { input_tokens: estimateInputTokens(body), output_tokens: 0 }, }, }); @@ -616,8 +703,20 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener break; } - fullResponse += continuationResponse; - console.log(`[Handler] 续写拼接完成: ${prevLength} → ${fullResponse.length} chars (+${continuationResponse.length})`); + // ★ 智能去重:模型续写时经常重复截断点前的内容 + // 在 fullResponse 末尾和 continuationResponse 开头之间寻找重叠部分并移除 + const deduped = deduplicateContinuation(fullResponse, continuationResponse); + fullResponse += deduped; + if (deduped.length !== continuationResponse.length) { + console.log(`[Handler] 续写去重: 移除了 ${continuationResponse.length - deduped.length} chars 的重复内容`); + } + console.log(`[Handler] 续写拼接完成: ${prevLength} → ${fullResponse.length} chars (+${deduped.length})`); + + // ★ 无进展检测:去重后没有新内容,说明模型在重复自己,继续续写无意义 + if (deduped.trim().length === 0) { + console.log(`[Handler] ⚠️ 续写内容全部为重复,停止续写`); + break; + } } let stopReason = (hasTools && isTruncated(fullResponse)) ? 'max_tokens' : 'end_turn'; @@ -806,6 +905,8 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest): Promise { let fullText = await sendCursorRequestFull(cursorReq); const hasTools = (body.tools?.length ?? 0) > 0; + let activeCursorReq = cursorReq; + let retryCount = 0; console.log(`[Handler] 非流式原始响应 (${fullText.length} chars, tools=${hasTools}): ${fullText.substring(0, 300)}${fullText.length > 300 ? '...' : ''}`); @@ -814,10 +915,11 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body if (shouldRetry()) { for (let attempt = 0; attempt < MAX_REFUSAL_RETRIES; attempt++) { - console.log(`[Handler] 非流式:检测到拒绝(第${attempt + 1}次重试)...原始: ${fullText.substring(0, 100)}`); + retryCount++; + console.log(`[Handler] 非流式:检测到拒绝(第${retryCount}次重试)...原始: ${fullText.substring(0, 100)}`); const retryBody = buildRetryRequest(body, attempt); - const retryCursorReq = await convertToCursorRequest(retryBody); - fullText = await sendCursorRequestFull(retryCursorReq); + activeCursorReq = await convertToCursorRequest(retryBody); + fullText = await sendCursorRequestFull(activeCursorReq); if (!shouldRetry()) break; } if (shouldRetry()) { @@ -834,6 +936,77 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body } } + // ★ 极短响应重试(可能是连接中断) + if (hasTools && fullText.trim().length < 10 && retryCount < MAX_REFUSAL_RETRIES) { + retryCount++; + console.log(`[Handler] 非流式:响应过短 (${fullText.length} chars),重试第${retryCount}次`); + activeCursorReq = await convertToCursorRequest(body); + fullText = await sendCursorRequestFull(activeCursorReq); + console.log(`[Handler] 非流式:重试响应 (${fullText.length} chars): ${fullText.substring(0, 200)}${fullText.length > 200 ? '...' : ''}`); + } + + // ★ 内部截断续写(与流式路径对齐) + // Claude CLI 使用非流式模式时,写大文件最容易被截断 + // 在 proxy 内部完成续写,确保工具调用参数完整 + const MAX_AUTO_CONTINUE = 6; + let continueCount = 0; + const originalMessages = [...activeCursorReq.messages]; + + while (hasTools && isTruncated(fullText) && continueCount < MAX_AUTO_CONTINUE) { + continueCount++; + const prevLength = fullText.length; + console.log(`[Handler] ⚠️ 非流式:内部检测到截断 (${fullText.length} chars),Proxy 将隐式请求无缝续写 (第${continueCount}次)...`); + + const anchorLength = Math.min(300, fullText.length); + const anchorText = fullText.slice(-anchorLength); + + const continuationPrompt = `Your previous response was cut off mid-output. The last part of your output was: + +\`\`\` +...${anchorText} +\`\`\` + +Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`; + + const continuationReq: CursorChatRequest = { + ...activeCursorReq, + messages: [ + ...originalMessages, + { + parts: [{ type: 'text', text: fullText }], + id: uuidv4(), + role: 'assistant', + }, + { + parts: [{ type: 'text', text: continuationPrompt }], + id: uuidv4(), + role: 'user', + }, + ], + }; + + const continuationResponse = await sendCursorRequestFull(continuationReq); + + if (continuationResponse.trim().length === 0) { + console.log(`[Handler] ⚠️ 非流式续写返回空响应,停止续写`); + break; + } + + // ★ 智能去重 + const deduped = deduplicateContinuation(fullText, continuationResponse); + fullText += deduped; + if (deduped.length !== continuationResponse.length) { + console.log(`[Handler] 非流式续写去重: 移除了 ${continuationResponse.length - deduped.length} chars 的重复内容`); + } + console.log(`[Handler] 非流式续写拼接完成: ${prevLength} → ${fullText.length} chars (+${deduped.length})`); + + // ★ 无进展检测:去重后没有新内容,停止续写 + if (deduped.trim().length === 0) { + console.log(`[Handler] ⚠️ 非流式续写内容全部为重复,停止续写`); + break; + } + } + const contentBlocks: AnthropicContentBlock[] = []; // ★ 截断检测:代码块/XML 未闭合时,返回 max_tokens 让 Claude Code 自动继续 let stopReason = (hasTools && isTruncated(fullText)) ? 'max_tokens' : 'end_turn'; @@ -844,6 +1017,42 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body if (hasTools) { let { toolCalls, cleanText } = parseToolCalls(fullText); + // ★ tool_choice=any 强制重试(与流式路径对齐) + const toolChoice = body.tool_choice; + const TOOL_CHOICE_MAX_RETRIES = 2; + let toolChoiceRetry = 0; + while ( + toolChoice?.type === 'any' && + toolCalls.length === 0 && + toolChoiceRetry < TOOL_CHOICE_MAX_RETRIES + ) { + toolChoiceRetry++; + console.log(`[Handler] 非流式:tool_choice=any 但模型未调用工具(第${toolChoiceRetry}次),强制重试...`); + + const forceMessages = [ + ...activeCursorReq.messages, + { + parts: [{ type: 'text' as const, text: fullText || '(no response)' }], + id: uuidv4(), + role: 'assistant' as const, + }, + { + parts: [{ + type: 'text' as const, + text: `Your last response did not include any \`\`\`json action block. This is required because tool_choice is "any". You MUST respond using the json action format for at least one action. Do not explain yourself — just output the action block now.`, + }], + id: uuidv4(), + role: 'user' as const, + }, + ]; + activeCursorReq = { ...activeCursorReq, messages: forceMessages }; + fullText = await sendCursorRequestFull(activeCursorReq); + ({ toolCalls, cleanText } = parseToolCalls(fullText)); + } + if (toolChoice?.type === 'any' && toolCalls.length === 0) { + console.log(`[Handler] 非流式:tool_choice=any 重试${TOOL_CHOICE_MAX_RETRIES}次后仍无工具调用`); + } + if (toolCalls.length > 0) { stopReason = 'tool_use'; @@ -889,9 +1098,9 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body model: body.model, stop_reason: stopReason, stop_sequence: null, - usage: { - input_tokens: 100, - output_tokens: Math.ceil(fullText.length / 4), + usage: { + input_tokens: estimateInputTokens(body), + output_tokens: Math.ceil(fullText.length / 3) }, }; diff --git a/src/openai-handler.ts b/src/openai-handler.ts index b9e932c..ea8e132 100644 --- a/src/openai-handler.ts +++ b/src/openai-handler.ts @@ -36,6 +36,7 @@ import { CLAUDE_IDENTITY_RESPONSE, CLAUDE_TOOLS_RESPONSE, MAX_REFUSAL_RETRIES, + estimateInputTokens, } from './handler.js'; function chatId(): string { @@ -622,9 +623,9 @@ async function handleOpenAINonStream( finish_reason: finishReason, }], usage: { - prompt_tokens: 100, - completion_tokens: Math.ceil(fullText.length / 4), - total_tokens: 100 + Math.ceil(fullText.length / 4), + prompt_tokens: estimateInputTokens(anthropicReq), + completion_tokens: Math.ceil(fullText.length / 3), + total_tokens: estimateInputTokens(anthropicReq) + Math.ceil(fullText.length / 3), }, };