fix: 修复截断问题 - Schema压缩+JSON感知解析器+续写机制重写

三个关键修复： 1. Schema 压缩（converter.ts） - 新增 compactSchema() 将完整 JSON Schema 压缩为紧凑类型签名 - 90 工具的 Schema 从 ~135k chars 降至 ~15k chars - 工具描述截断至 200 chars - 直接增大 Cursor API 输出预算（输入越小→输出越大） 2. JSON-string-aware 解析器（converter.ts） - 替换 parseToolCalls 的 lazy regex 为手动扫描器 - 正确跳过 JSON 字符串内部的 ``` 标记 - 修复 Write/Edit 工具 content 含 markdown 代码块时被提前截断的 bug - 新增截断代码块恢复（无闭合 ``` 时仍可解析工具调用） 3. 续写机制重写（handler.ts） - 续写请求增加 user 引导消息（解决模型返回空响应的问题） - 每次基于原始消息快照重建（防止上下文膨胀） - 提取最后 300 chars 作为续写锚点 - 空响应时立即停止，避免无效循环 - MAX_AUTO_CONTINUE 从 4 提升至 6
2026-06-01 19:39:47 +08:00 · 2026-03-11 10:21:27 +08:00
parent ecf4fa82ee
commit 4b3715700b
2 changed files with 174 additions and 26 deletions
--- a/src/converter.ts
+++ b/src/converter.ts
@@ -25,6 +25,42 @@ import { fixToolCallArguments } from './tool-fixer.js';

 // ==================== 工具指令构建 ====================

+/**
+ * 将 JSON Schema 压缩为紧凑的类型签名
+ * 目的：90 个工具的完整 JSON Schema 约 135,000 chars，压缩后约 15,000 chars
+ * 这直接影响 Cursor API 的输出预算（输入越大，输出越少）
+ *
+ * 示例：
+ *   完整: {"type":"object","properties":{"file_path":{"type":"string","description":"..."},"encoding":{"type":"string","enum":["utf-8","base64"]}},"required":["file_path"]}
+ *   压缩: {file_path!: string, encoding?: utf-8|base64}
+ */
+function compactSchema(schema: Record<string, unknown>): string {
+    if (!schema?.properties) return '{}';
+    const props = schema.properties as Record<string, Record<string, unknown>>;
+    const required = new Set((schema.required as string[]) || []);
+
+    const parts = Object.entries(props).map(([name, prop]) => {
+        let type = (prop.type as string) || 'any';
+        // enum 值直接展示（对正确生成参数至关重要）
+        if (prop.enum) {
+            type = (prop.enum as string[]).join('|');
+        }
+        // 数组类型标注 items 类型
+        if (type === 'array' && prop.items) {
+            const itemType = (prop.items as Record<string, unknown>).type || 'any';
+            type = `${itemType}[]`;
+        }
+        // 嵌套对象简写
+        if (type === 'object' && prop.properties) {
+            type = compactSchema(prop as Record<string, unknown>);
+        }
+        const req = required.has(name) ? '!' : '?';
+        return `${name}${req}: ${type}`;
+    });
+
+    return `{${parts.join(', ')}}`;
+}
+
 /**
 * 将工具定义构建为格式指令
 * 使用 Cursor IDE 原生场景融合：不覆盖模型身份，而是顺应它在 IDE 内的角色
@@ -37,8 +73,11 @@ function buildToolInstructions(
    if (!tools || tools.length === 0) return '';

    const toolList = tools.map((tool) => {
-        const schema = tool.input_schema ? JSON.stringify(tool.input_schema) : '{}';
-        return `- **${tool.name}**: ${tool.description || 'No description'}\n  Schema: ${schema}`;
+        // ★ 使用紧凑 Schema 替代完整 JSON Schema 以大幅减小输入体积
+        const schema = tool.input_schema ? compactSchema(tool.input_schema) : '{}';
+        // 截断过长的工具描述（部分客户端的工具描述可达数千字符）
+        const desc = (tool.description || 'No description').substring(0, 200);
+        return `- **${tool.name}**: ${desc}\n  Params: ${schema}`;
    }).join('\n');

    // ★ tool_choice 强制约束
@@ -530,36 +569,107 @@ function tolerantParse(jsonStr: string): any {
    }
 }

+/**
+ * 从 ```json action 代码块中解析工具调用
+ *
+ * ★ 使用 JSON-string-aware 扫描器替代简单的正则匹配
+ * 原因：Write/Edit 工具的 content 参数经常包含 markdown 代码块（``` 标记），
+ * 简单的 lazy regex `/```json[\s\S]*?```/g` 会在 JSON 字符串内部的 ``` 处提前闭合，
+ * 导致工具参数被截断（例如一个 5000 字的文件只保留前几行）
+ */
 export function parseToolCalls(responseText: string): {
    toolCalls: ParsedToolCall[];
    cleanText: string;
 } {
    const toolCalls: ParsedToolCall[] = [];
-    let cleanText = responseText;
+    const blocksToRemove: Array<{ start: number; end: number }> = [];

-    const fullBlockRegex = /```json(?:\s+action)?\s*([\s\S]*?)\s*```/g;
+    // 查找所有 ```json (action)? 开头的位置
+    const openPattern = /```json(?:\s+action)?/g;
+    let openMatch: RegExpExecArray | null;

-    let match: RegExpExecArray | null;
-    while ((match = fullBlockRegex.exec(responseText)) !== null) {
-        let isToolCall = false;
-        try {
-            const parsed = tolerantParse(match[1]);
-            if (parsed.tool || parsed.name) {
-                const name = parsed.tool || parsed.name;
-                let args = parsed.parameters || parsed.arguments || parsed.input || {};
-                args = fixToolCallArguments(name, args);
-                toolCalls.push({ name, arguments: args });
-                isToolCall = true;
+    while ((openMatch = openPattern.exec(responseText)) !== null) {
+        const blockStart = openMatch.index;
+        const contentStart = blockStart + openMatch[0].length;
+
+        // 从内容起始处向前扫描，跳过 JSON 字符串内部的 ```
+        let pos = contentStart;
+        let inJsonString = false;
+        let escaped = false;
+        let closingPos = -1;
+
+        while (pos < responseText.length - 2) {
+            const char = responseText[pos];
+
+            if (escaped) {
+                escaped = false;
+                pos++;
+                continue;
            }
-        } catch (e) {
-            console.error('[Converter] tolerantParse 失败:', e);
+
+            if (char === '\\' && inJsonString) {
+                escaped = true;
+                pos++;
+                continue;
+            }
+
+            if (char === '"') {
+                inJsonString = !inJsonString;
+                pos++;
+                continue;
+            }
+
+            // 只在 JSON 字符串外部匹配闭合 ```
+            if (!inJsonString && responseText.substring(pos, pos + 3) === '```') {
+                closingPos = pos;
+                break;
+            }
+
+            pos++;
        }

-        if (isToolCall) {
-            cleanText = cleanText.replace(match[0], '');
+        if (closingPos >= 0) {
+            const jsonContent = responseText.substring(contentStart, closingPos).trim();
+            try {
+                const parsed = tolerantParse(jsonContent);
+                if (parsed.tool || parsed.name) {
+                    const name = parsed.tool || parsed.name;
+                    let args = parsed.parameters || parsed.arguments || parsed.input || {};
+                    args = fixToolCallArguments(name, args);
+                    toolCalls.push({ name, arguments: args });
+                    blocksToRemove.push({ start: blockStart, end: closingPos + 3 });
+                }
+            } catch (e) {
+                console.error('[Converter] tolerantParse 失败:', e);
+            }
+        } else {
+            // 没有闭合 ``` — 代码块被截断，尝试解析已有内容
+            const jsonContent = responseText.substring(contentStart).trim();
+            if (jsonContent.length > 10) {
+                try {
+                    const parsed = tolerantParse(jsonContent);
+                    if (parsed.tool || parsed.name) {
+                        const name = parsed.tool || parsed.name;
+                        let args = parsed.parameters || parsed.arguments || parsed.input || {};
+                        args = fixToolCallArguments(name, args);
+                        toolCalls.push({ name, arguments: args });
+                        blocksToRemove.push({ start: blockStart, end: responseText.length });
+                        console.log(`[Converter] ⚠️ 从截断的代码块中恢复工具调用: ${name}`);
+                    }
+                } catch {
+                    console.log(`[Converter] 截断的代码块无法解析为工具调用`);
+                }
+            }
        }
    }

+    // 从后往前移除已解析的代码块，保留 cleanText
+    let cleanText = responseText;
+    for (let i = blocksToRemove.length - 1; i >= 0; i--) {
+        const block = blocksToRemove[i];
+        cleanText = cleanText.substring(0, block.start) + cleanText.substring(block.end);
+    }
+
    return { toolCalls, cleanText: cleanText.trim() };
 }

--- a/src/handler.ts
+++ b/src/handler.ts
@@ -413,12 +413,18 @@ export function isTruncated(text: string): boolean {
    // 代码块未闭合
    const codeBlockOpen = (trimmed.match(/```/g) || []).length % 2 !== 0;
    if (codeBlockOpen) return true;
+    // 检测 ```json action 块已开始但 JSON 对象未闭合（截断发生在工具调用参数中间）
+    const jsonActionBlocks = trimmed.match(/```json\s+action[\s\S]*?```/g) || [];
+    const jsonActionOpens = (trimmed.match(/```json\s+action/g) || []).length;
+    if (jsonActionOpens > jsonActionBlocks.length) return true;
    // XML/HTML 标签未闭合 (Cursor 有时在中途截断)
    const openTags = (trimmed.match(/^<[a-zA-Z]/gm) || []).length;
    const closeTags = (trimmed.match(/^<\/[a-zA-Z]/gm) || []).length;
    if (openTags > closeTags + 1) return true;
    // 以逗号、分号、冒号、开括号结尾（明显未完成）
    if (/[,;:\[{(]\s*$/.test(trimmed)) return true;
+    // 长响应以反斜杠 + n 结尾（JSON 字符串中间被截断）
+    if (trimmed.length > 2000 && /\\n?\s*$/.test(trimmed) && !trimmed.endsWith('```')) return true;
    // 短响应且以小写字母结尾（句子被截断的强烈信号）
    if (trimmed.length < 500 && /[a-z]$/.test(trimmed)) return false; // 短响应不判断
    return false;
@@ -556,21 +562,46 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
        // 流完成后，处理完整响应
        // ★ 内部截断续写：如果模型输出过长被截断（常见于写大文件），Proxy 内部分段续写，然后拼接成完整响应
        // 这样可以确保工具调用（如 Write）不会横跨两次 API 响应而退化为纯文本
-        const MAX_AUTO_CONTINUE = 4;
+        const MAX_AUTO_CONTINUE = 6;
        let continueCount = 0;
        
+        // 保存原始请求的消息快照（不含续写追加的消息）
+        const originalMessages = [...activeCursorReq.messages];
+        
        while (hasTools && isTruncated(fullResponse) && continueCount < MAX_AUTO_CONTINUE) {
            continueCount++;
+            const prevLength = fullResponse.length;
            console.log(`[Handler] ⚠️ 内部检测到截断 (${fullResponse.length} chars)，Proxy 将隐式请求无缝续写 (第${continueCount}次)...`);
            
-            // 构造续写请求：让 assistant 成为最后一条消息，模型会隐式无缝衔接
+            // 提取截断点的最后一段文本作为上下文锚点
+            const anchorLength = Math.min(300, fullResponse.length);
+            const anchorText = fullResponse.slice(-anchorLength);
+            
+            // 构造续写请求：原始消息 + 截断的 assistant 回复 + user 续写引导
+            // 每次重建而非累积，防止上下文膨胀
+            const continuationPrompt = `Your previous response was cut off mid-output. The last part of your output was:
+
+\`\`\`
+...${anchorText}
+\`\`\`
+
+Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`;
+
            activeCursorReq = {
                ...activeCursorReq,
-                messages: [...activeCursorReq.messages, {
-                    parts: [{ type: 'text', text: fullResponse }],
-                    id: uuidv4(),
-                    role: 'assistant',
-                }],
+                messages: [
+                    ...originalMessages,
+                    {
+                        parts: [{ type: 'text', text: fullResponse }],
+                        id: uuidv4(),
+                        role: 'assistant',
+                    },
+                    {
+                        parts: [{ type: 'text', text: continuationPrompt }],
+                        id: uuidv4(),
+                        role: 'user',
+                    },
+                ],
            };
            
            let continuationResponse = '';
@@ -579,7 +610,14 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
                    continuationResponse += event.delta;
                }
            });
+
+            if (continuationResponse.trim().length === 0) {
+                console.log(`[Handler] ⚠️ 续写返回空响应，停止续写`);
+                break;
+            }
+
            fullResponse += continuationResponse;
+            console.log(`[Handler] 续写拼接完成: ${prevLength} → ${fullResponse.length} chars (+${continuationResponse.length})`);
        }

        let stopReason = (hasTools && isTruncated(fullResponse)) ? 'max_tokens' : 'end_turn';