diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 03b9783..7508c60 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -1,11 +1,8 @@
 {
   "permissions": {
     "allow": [
-      "Bash(dir e:\\\\CodeAI\\\\github\\\\cursor2api:*)",
-      "Bash(cmd:*)",
-      "WebFetch(domain:wttr.in)",
-      "mcp__fetch__fetch",
-      "mcp__filesystem__directory_tree"
+      "mcp__filesystem__read_text_file",
+      "WebSearch"
     ]
   }
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 180e312..e859e86 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,45 @@
 # Changelog
 
+## v2.5.6 (2026-03-12)
+
+### 🗜️ 渐进式历史压缩
+
+- **新策略**：保留最近 6 条消息完整不动，仅压缩早期消息中超过 2000 字符的文本部分
+- 不删除任何消息（保留完整对话结构），只截短单条消息的超长文本
+- 兼顾上下文完整性与输出空间，替代之前被移除的全删式智能压缩
+- 工具描述截断从 200 → **80 字符**（Schema 已包含参数信息，短描述节省输入体积）
+- 工具结果截断从 30000 → **15000 字符**（为输出留更多空间）
+
+### 🔧 续写智能去重
+
+- **问题**：模型续写时经常重复截断点附近的内容，拼接后出现重复段落
+- **新增 `deduplicateContinuation()`**：在原内容尾部和续写头部之间搜索最长重叠，自动移除重复部分
+- 支持字符级精确匹配和行级模糊匹配两种去重策略
+- 去重后无新内容时自动停止续写（防止无限循环）
+- 流式和非流式路径均已集成
+
+### ⚡ 非流式截断续写（与流式路径对齐）
+
+- **问题**：非流式模式下 Write 大文件等长输出被截断后，Claude Code 直接收到不完整的工具调用 JSON，导致 `tool_use` 退化为纯文本
+- **修复**：非流式路径新增内部截断续写（最多 6 次），与流式路径逻辑完全对齐
+- 新增 `tool_choice=any` 强制重试（非流式）：模型未输出工具调用时自动追加强制消息重试
+- 新增极短响应重试（非流式）：响应 < 10 字符时自动重试
+
+### 📊 Token 估算优化
+
+- 提取 `estimateInputTokens()` 为独立函数，Anthropic 和 OpenAI handler 共用
+- 估算比例从 1/4 调整为 **1/3**（更适合中英文混合和代码场景）+ 10% 安全边距
+- 新增工具定义的 token 估算（每个压缩工具签名 ~200 chars + 1000 chars 指令开销）
+- 替代之前 `input_tokens: 100` 的硬编码占位符
+
+### 🛡️ JSON 解析器加固
+
+- **反斜杠计数精确化**：`tolerantParse` 和 `parseToolCalls` 中的字符串状态跟踪从 `escaped` 布尔标志改为**反向计数连续反斜杠**，正确处理 `\\\"` (未转义) vs `\\\\\\\"` (已转义) 等边界情况
+- **新增第五层逆向贪婪提取**：当所有 JSON 修复手段失败时，对 Write/Edit 等工具的 `content`/`command`/`text` 等大值字段进行逆向贪婪提取，从 JSON 末尾向前搜索值的结束引号
+- 小值字段（`file_path`、`path` 等）仍用精确正则提取
+
+---
+
 ## v2.5.5 (2026-03-12)
 
 ### 🐛 修复长响应误判为拒绝
diff --git a/README.md b/README.md
index 36c98e1..ff4364f 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,8 @@
 - **多层拒绝拦截** - 自动检测和抑制 Cursor 文档助手的拒绝行为（工具和非工具模式均生效）
 - **三层身份保护** - 身份探针拦截 + 拒绝重试 + 响应清洗，确保输出永远呈现 Claude 身份
 - **🆕 截断无缝续写** - Proxy 底层自动拼接被截断的工具响应（代码块/XML未闭合），防止工具调用在长输出中退化为纯文本，彻底代替粗暴的上下文压缩解决失忆问题。
+- **🆕 续写智能去重** - 模型续写时自动检测并移除与截断点重叠的重复内容，防止拼接后出现重复段落
+- **🆕 渐进式历史压缩** - 保留最近6条消息完整，仅截短早期消息超长文本，兼顾上下文完整性与输出空间
 - **🆕 Schema 压缩** - 工具定义从完整 JSON Schema (~135k chars) 压缩为紧凑类型签名 (~15k chars)，大幅提升 Cursor API 输出预算
 - **🆕 JSON 感知解析器** - 正确处理 Write/Edit 工具 content 中的嵌入式代码块，避免工具参数被 markdown ``` 标记截断
 - **连续同角色消息自动合并** - 满足 Anthropic API 交替要求，解决 Cursor IDE 发送格式兼容问题
@@ -156,6 +158,28 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use`
 
 ## 更新日志
 
+### v2.5.6 (2026-03-12) — 渐进式压缩 + 续写去重 + 非流式续写对齐 + Token 估算优化
+
+**🗜️ 渐进式历史压缩**
+- 保留最近 6 条消息完整，仅截短早期超长文本至 2000 字符
+- 工具描述 200→80 chars、工具结果 30k→15k chars，为输出留更多空间
+
+**🔧 续写智能去重 `deduplicateContinuation()`**
+- 字符级+行级双重去重策略，全部重复时自动停止续写
+- 流式和非流式路径均已集成
+
+**⚡ 非流式截断续写（与流式路径对齐）**
+- 非流式路径新增内部续写（最多 6 次）
+- 新增 `tool_choice=any` 强制重试 + 极短响应重试
+
+**📊 Token 估算优化**
+- `estimateInputTokens()` 独立函数，两端共用
+- 比例 1/4→1/3 + 10% 安全边距 + 工具定义估算
+
+**🛡️ JSON 解析器加固**
+- 反斜杠精确计数替代布尔标志
+- 新增第五层逆向贪婪提取大值字段
+
 ### v2.5.3 (2026-03-11) — Schema 压缩 + JSON 感知解析器 + 续写重写
 
 **Schema 压缩 — 根治截断问题**
diff --git a/package.json b/package.json
index d0c3086..0166520 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "cursor2api",
-  "version": "2.5.5",
+  "version": "2.5.6",
   "description": "Proxy Cursor docs AI to Anthropic Messages API for Claude Code",
   "type": "module",
   "scripts": {
diff --git a/src/converter.ts b/src/converter.ts
index 2c238f6..baaf072 100644
--- a/src/converter.ts
+++ b/src/converter.ts
@@ -76,7 +76,8 @@ function buildToolInstructions(
         // ★ 使用紧凑 Schema 替代完整 JSON Schema 以大幅减小输入体积
         const schema = tool.input_schema ? compactSchema(tool.input_schema) : '{}';
         // 截断过长的工具描述（部分客户端的工具描述可达数千字符）
-        const desc = (tool.description || 'No description').substring(0, 200);
+        // ★ 80 chars 足矣：Schema 已包含参数信息，短描述减少输入体积，为输出留更多空间
+        const desc = (tool.description || 'No description').substring(0, 80);
         return `- **${tool.name}**: ${desc}\n  Params: ${schema}`;
     }).join('\n');
 
@@ -287,10 +288,25 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
         }
     }
 
-    // ★ 智能压缩已移除：
-    // 用户反馈上下文压缩会丢失旧文件的具体内容，导致模型在多轮对话中陷入死循环或幻觉（例如重复读取文件或误解历史输出）。
-    // 另外，我们已经通过 \`isTruncated\` 自动检测截断并返回 \`max_tokens\`，这也从根源上解决了需要“继续”按钮的问题。
-    // 因此这里不再主动压扁历史记录。
+    // ★ 渐进式历史压缩（替代之前全删的智能压缩）
+    // 策略：保留最近 KEEP_RECENT 条消息完整，仅压缩早期消息中的超长文本
+    // 这不会丢失消息结构（不删消息），只缩短单条消息的文本，兼顾上下文完整性和输出空间
+    const KEEP_RECENT = 6; // 保留最近6条消息不压缩
+    const EARLY_MSG_MAX_CHARS = 2000; // 早期消息的最大字符数
+    if (messages.length > KEEP_RECENT + 2) { // +2 for few-shot messages
+        const compressEnd = messages.length - KEEP_RECENT;
+        for (let i = 2; i < compressEnd; i++) { // 从 index 2 开始跳过 few-shot
+            const msg = messages[i];
+            for (const part of msg.parts) {
+                if (part.text && part.text.length > EARLY_MSG_MAX_CHARS) {
+                    const originalLen = part.text.length;
+                    part.text = part.text.substring(0, EARLY_MSG_MAX_CHARS) +
+                        `\n\n... [truncated ${originalLen - EARLY_MSG_MAX_CHARS} chars for context budget]`;
+                    console.log(`[Converter] 📦 压缩早期消息 msg[${i}] (${msg.role}): ${originalLen} → ${part.text.length} chars`);
+                }
+            }
+        }
+    }
 
     // 诊断日志：记录发给 Cursor docs AI 的消息摘要
     let totalChars = 0;
@@ -311,7 +327,8 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
 }
 
 // 最大工具结果长度（超过则截断，防止上下文溢出）
-const MAX_TOOL_RESULT_LENGTH = 30000;
+// ★ 15000 chars 平衡点：保留足够信息让模型理解结果，同时为输出留空间
+const MAX_TOOL_RESULT_LENGTH = 15000;
 
 
 
@@ -456,42 +473,45 @@ function tolerantParse(jsonStr: string): any {
 
     // 第二次尝试：处理字符串内的裸换行符、制表符
     let inString = false;
-    let escaped = false;
     let fixed = '';
     const bracketStack: string[] = []; // 跟踪 { 和 [ 的嵌套层级
 
     for (let i = 0; i < jsonStr.length; i++) {
         const char = jsonStr[i];
 
-        if (char === '\\' && !escaped) {
-            escaped = true;
+        // ★ 精确反斜杠计数：只有奇数个连续反斜杠后的引号才是转义的
+        if (char === '"') {
+            let backslashCount = 0;
+            for (let j = i - 1; j >= 0 && fixed[j] === '\\'; j--) {
+                backslashCount++;
+            }
+            if (backslashCount % 2 === 0) {
+                // 偶数个反斜杠 → 引号未被转义 → 切换字符串状态
+                inString = !inString;
+            }
             fixed += char;
-        } else if (char === '"' && !escaped) {
-            inString = !inString;
-            fixed += char;
-            escaped = false;
-        } else {
-            if (inString) {
-                // 裸控制字符转义
-                if (char === '\n') {
-                    fixed += '\\n';
-                } else if (char === '\r') {
-                    fixed += '\\r';
-                } else if (char === '\t') {
-                    fixed += '\\t';
-                } else {
-                    fixed += char;
-                }
+            continue;
+        }
+
+        if (inString) {
+            // 裸控制字符转义
+            if (char === '\n') {
+                fixed += '\\n';
+            } else if (char === '\r') {
+                fixed += '\\r';
+            } else if (char === '\t') {
+                fixed += '\\t';
             } else {
-                // 在字符串外跟踪括号层级
-                if (char === '{' || char === '[') {
-                    bracketStack.push(char === '{' ? '}' : ']');
-                } else if (char === '}' || char === ']') {
-                    if (bracketStack.length > 0) bracketStack.pop();
-                }
                 fixed += char;
             }
-            escaped = false;
+        } else {
+            // 在字符串外跟踪括号层级
+            if (char === '{' || char === '[') {
+                bracketStack.push(char === '{' ? '}' : ']');
+            } else if (char === '}' || char === ']') {
+                if (bracketStack.length > 0) bracketStack.pop();
+            }
+            fixed += char;
         }
     }
 
@@ -530,20 +550,21 @@ function tolerantParse(jsonStr: string): any {
                 let params: Record<string, unknown> = {};
                 if (paramsMatch) {
                     const paramsStr = paramsMatch[1];
-                    // 逐字符找到 parameters 对象的闭合 }
+                    // 逐字符找到 parameters 对象的闭合 }，使用精确反斜杠计数
                     let depth = 0;
                     let end = -1;
                     let pInString = false;
-                    let pEscaped = false;
                     for (let i = 0; i < paramsStr.length; i++) {
                         const c = paramsStr[i];
-                        if (c === '\\' && !pEscaped) { pEscaped = true; continue; }
-                        if (c === '"' && !pEscaped) { pInString = !pInString; }
+                        if (c === '"') {
+                            let bsc = 0;
+                            for (let j = i - 1; j >= 0 && paramsStr[j] === '\\'; j--) bsc++;
+                            if (bsc % 2 === 0) pInString = !pInString;
+                        }
                         if (!pInString) {
                             if (c === '{') depth++;
                             if (c === '}') { depth--; if (depth === 0) { end = i; break; } }
                         }
-                        pEscaped = false;
                     }
                     if (end > 0) {
                         const rawParams = paramsStr.substring(0, end + 1);
@@ -564,6 +585,67 @@ function tolerantParse(jsonStr: string): any {
             }
         } catch { /* ignore */ }
 
+        // ★ 第五次尝试：逆向贪婪提取大值字段
+        // 专门处理 Write/Edit 工具的 content 参数包含未转义引号导致 JSON 完全损坏的情况
+        // 策略：先找到 tool 名，然后对 content/command/text 等大值字段，
+        // 取该字段 "key": " 后面到最后一个可能的闭合点之间的所有内容
+        try {
+            const toolMatch2 = jsonStr.match(/["'](?:tool|name)["']\s*:\s*["']([^"']+)["']/);
+            if (toolMatch2) {
+                const toolName = toolMatch2[1];
+                const params: Record<string, unknown> = {};
+
+                // 大值字段列表（这些字段最容易包含有问题的内容）
+                const bigValueFields = ['content', 'command', 'text', 'new_string', 'new_str', 'file_text', 'code'];
+                // 小值字段仍用正则精确提取
+                const smallFieldRegex = /"(file_path|path|file|old_string|old_str|insert_line|mode|encoding|description|language|name)"\s*:\s*"((?:[^"\\]|\\.)*)"/g;
+                let sfm;
+                while ((sfm = smallFieldRegex.exec(jsonStr)) !== null) {
+                    params[sfm[1]] = sfm[2].replace(/\\n/g, '\n').replace(/\\t/g, '\t').replace(/\\\\/g, '\\');
+                }
+
+                // 对大值字段进行贪婪提取：从 "content": " 开始，到倒数第二个 " 结束
+                for (const field of bigValueFields) {
+                    const fieldStart = jsonStr.indexOf(`"${field}"`);
+                    if (fieldStart === -1) continue;
+
+                    // 找到 ": " 后的第一个引号
+                    const colonPos = jsonStr.indexOf(':', fieldStart + field.length + 2);
+                    if (colonPos === -1) continue;
+                    const valueStart = jsonStr.indexOf('"', colonPos);
+                    if (valueStart === -1) continue;
+
+                    // 从末尾逆向查找：跳过可能的 }]} 和空白，找到值的结束引号
+                    let valueEnd = jsonStr.length - 1;
+                    // 跳过尾部的 }, ], 空白
+                    while (valueEnd > valueStart && /[}\]\s,]/.test(jsonStr[valueEnd])) {
+                        valueEnd--;
+                    }
+                    // 此时 valueEnd 应该指向值的结束引号
+                    if (jsonStr[valueEnd] === '"' && valueEnd > valueStart + 1) {
+                        const rawValue = jsonStr.substring(valueStart + 1, valueEnd);
+                        // 尝试解码 JSON 转义序列
+                        try {
+                            params[field] = JSON.parse(`"${rawValue}"`);
+                        } catch {
+                            // 如果解码失败，做基本替换
+                            params[field] = rawValue
+                                .replace(/\\n/g, '\n')
+                                .replace(/\\t/g, '\t')
+                                .replace(/\\r/g, '\r')
+                                .replace(/\\\\/g, '\\')
+                                .replace(/\\"/g, '"');
+                        }
+                    }
+                }
+
+                if (Object.keys(params).length > 0) {
+                    console.log(`[Converter] tolerantParse 逆向贪婪提取成功: tool=${toolName}, fields=[${Object.keys(params).join(', ')}]`);
+                    return { tool: toolName, parameters: params };
+                }
+            }
+        } catch { /* ignore */ }
+
         // 全部修复手段失败，重新抛出
         throw _e2;
     }
@@ -595,26 +677,23 @@ export function parseToolCalls(responseText: string): {
         // 从内容起始处向前扫描，跳过 JSON 字符串内部的 ```
         let pos = contentStart;
         let inJsonString = false;
-        let escaped = false;
         let closingPos = -1;
 
         while (pos < responseText.length - 2) {
             const char = responseText[pos];
 
-            if (escaped) {
-                escaped = false;
-                pos++;
-                continue;
-            }
-
-            if (char === '\\' && inJsonString) {
-                escaped = true;
-                pos++;
-                continue;
-            }
-
             if (char === '"') {
-                inJsonString = !inJsonString;
+                // ★ 精确反斜杠计数：计算引号前连续反斜杠的数量
+                // 只有奇数个反斜杠时引号才是被转义的
+                // 例如: \" → 转义(1个\), \\" → 未转义(2个\), \\\" → 转义(3个\)
+                let backslashCount = 0;
+                for (let j = pos - 1; j >= contentStart && responseText[j] === '\\'; j--) {
+                    backslashCount++;
+                }
+                if (backslashCount % 2 === 0) {
+                    // 偶数个反斜杠 → 引号未被转义 → 切换字符串状态
+                    inJsonString = !inJsonString;
+                }
                 pos++;
                 continue;
             }
diff --git a/src/handler.ts b/src/handler.ts
index e1a4f55..08953c9 100644
--- a/src/handler.ts
+++ b/src/handler.ts
@@ -135,18 +135,32 @@ export function listModels(_req: Request, res: Response): void {
 
 // ==================== Token 计数 ====================
 
-export function countTokens(req: Request, res: Response): void {
-    const body = req.body as AnthropicRequest;
+export function estimateInputTokens(body: AnthropicRequest): number {
     let totalChars = 0;
 
     if (body.system) {
         totalChars += typeof body.system === 'string' ? body.system.length : JSON.stringify(body.system).length;
     }
+    
     for (const msg of body.messages ?? []) {
         totalChars += typeof msg.content === 'string' ? msg.content.length : JSON.stringify(msg.content).length;
     }
 
-    res.json({ input_tokens: Math.max(1, Math.ceil(totalChars / 4)) });
+    // Tool schemas are heavily compressed by compactSchema in converter.ts.
+    // However, they still consume Cursor's context budget. 
+    // If not counted, Claude CLI will dangerously underestimate context size.
+    if (body.tools && body.tools.length > 0) {
+        totalChars += body.tools.length * 200; // ~200 chars per compressed tool signature
+        totalChars += 1000; // Tool use guidelines and behavior instructions
+    }
+    
+    // Safer estimation for mixed Chinese/English and Code: 1 token ≈ 3 chars + 10% safety margin.
+    return Math.max(1, Math.ceil((totalChars / 3) * 1.1));
+}
+
+export function countTokens(req: Request, res: Response): void {
+    const body = req.body as AnthropicRequest;
+    res.json({ input_tokens: estimateInputTokens(body) });
 }
 
 // ==================== 身份探针拦截 ====================
@@ -430,6 +444,79 @@ export function isTruncated(text: string): boolean {
     return false;
 }
 
+// ==================== 续写去重 ====================
+
+/**
+ * 续写拼接智能去重
+ * 
+ * 模型续写时经常重复截断点附近的内容，导致拼接后出现重复段落。
+ * 此函数在 existing 的尾部和 continuation 的头部之间寻找最长重叠，
+ * 然后返回去除重叠部分的 continuation。
+ * 
+ * 算法：从续写内容的头部取不同长度的前缀，检查是否出现在原内容的尾部
+ */
+function deduplicateContinuation(existing: string, continuation: string): string {
+    if (!continuation || !existing) return continuation;
+
+    // 对比窗口：取原内容尾部和续写头部的最大重叠检测范围
+    const maxOverlap = Math.min(500, existing.length, continuation.length);
+    if (maxOverlap < 10) return continuation; // 太短不值得去重
+
+    const tail = existing.slice(-maxOverlap);
+
+    // 从长到短搜索重叠：找最长的匹配
+    let bestOverlap = 0;
+    for (let len = maxOverlap; len >= 10; len--) {
+        const prefix = continuation.substring(0, len);
+        // 检查 prefix 是否出现在 tail 的末尾
+        if (tail.endsWith(prefix)) {
+            bestOverlap = len;
+            break;
+        }
+    }
+
+    // 如果没找到尾部完全匹配的重叠，尝试行级别的去重
+    // 场景：模型从某一行的开头重新开始，但截断点可能在行中间
+    if (bestOverlap === 0) {
+        const continuationLines = continuation.split('\n');
+        const tailLines = tail.split('\n');
+        
+        // 从续写的第一行开始，在原内容尾部的行中寻找匹配
+        if (continuationLines.length > 0 && tailLines.length > 0) {
+            const firstContLine = continuationLines[0].trim();
+            if (firstContLine.length >= 10) {
+                // 检查续写的前几行是否在原内容尾部出现过
+                for (let i = tailLines.length - 1; i >= 0; i--) {
+                    if (tailLines[i].trim() === firstContLine) {
+                        // 从这一行开始往后对比连续匹配的行数
+                        let matchedLines = 1;
+                        for (let k = 1; k < continuationLines.length && i + k < tailLines.length; k++) {
+                            if (continuationLines[k].trim() === tailLines[i + k].trim()) {
+                                matchedLines++;
+                            } else {
+                                break;
+                            }
+                        }
+                        if (matchedLines >= 2) {
+                            // 移除续写中匹配的行
+                            const deduped = continuationLines.slice(matchedLines).join('\n');
+                            console.log(`[Handler] 行级去重: 移除了续写前 ${matchedLines} 行的重复内容`);
+                            return deduped;
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if (bestOverlap > 0) {
+        return continuation.substring(bestOverlap);
+    }
+
+    return continuation;
+}
+
 // ==================== 重试辅助 ====================
 export const MAX_REFUSAL_RETRIES = 2;
 
@@ -487,7 +574,7 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
         message: {
             id, type: 'message', role: 'assistant', content: [],
             model, stop_reason: null, stop_sequence: null,
-            usage: { input_tokens: 100, output_tokens: 0 },
+            usage: { input_tokens: estimateInputTokens(body), output_tokens: 0 },
         },
     });
 
@@ -616,8 +703,20 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
                 break;
             }
 
-            fullResponse += continuationResponse;
-            console.log(`[Handler] 续写拼接完成: ${prevLength} → ${fullResponse.length} chars (+${continuationResponse.length})`);
+            // ★ 智能去重：模型续写时经常重复截断点前的内容
+            // 在 fullResponse 末尾和 continuationResponse 开头之间寻找重叠部分并移除
+            const deduped = deduplicateContinuation(fullResponse, continuationResponse);
+            fullResponse += deduped;
+            if (deduped.length !== continuationResponse.length) {
+                console.log(`[Handler] 续写去重: 移除了 ${continuationResponse.length - deduped.length} chars 的重复内容`);
+            }
+            console.log(`[Handler] 续写拼接完成: ${prevLength} → ${fullResponse.length} chars (+${deduped.length})`);
+
+            // ★ 无进展检测：去重后没有新内容，说明模型在重复自己，继续续写无意义
+            if (deduped.trim().length === 0) {
+                console.log(`[Handler] ⚠️ 续写内容全部为重复，停止续写`);
+                break;
+            }
         }
 
         let stopReason = (hasTools && isTruncated(fullResponse)) ? 'max_tokens' : 'end_turn';
@@ -806,6 +905,8 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
 async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest): Promise<void> {
     let fullText = await sendCursorRequestFull(cursorReq);
     const hasTools = (body.tools?.length ?? 0) > 0;
+    let activeCursorReq = cursorReq;
+    let retryCount = 0;
 
     console.log(`[Handler] 非流式原始响应 (${fullText.length} chars, tools=${hasTools}): ${fullText.substring(0, 300)}${fullText.length > 300 ? '...' : ''}`);
 
@@ -814,10 +915,11 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
 
     if (shouldRetry()) {
         for (let attempt = 0; attempt < MAX_REFUSAL_RETRIES; attempt++) {
-            console.log(`[Handler] 非流式：检测到拒绝（第${attempt + 1}次重试）...原始: ${fullText.substring(0, 100)}`);
+            retryCount++;
+            console.log(`[Handler] 非流式：检测到拒绝（第${retryCount}次重试）...原始: ${fullText.substring(0, 100)}`);
             const retryBody = buildRetryRequest(body, attempt);
-            const retryCursorReq = await convertToCursorRequest(retryBody);
-            fullText = await sendCursorRequestFull(retryCursorReq);
+            activeCursorReq = await convertToCursorRequest(retryBody);
+            fullText = await sendCursorRequestFull(activeCursorReq);
             if (!shouldRetry()) break;
         }
         if (shouldRetry()) {
@@ -834,6 +936,77 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
         }
     }
 
+    // ★ 极短响应重试（可能是连接中断）
+    if (hasTools && fullText.trim().length < 10 && retryCount < MAX_REFUSAL_RETRIES) {
+        retryCount++;
+        console.log(`[Handler] 非流式：响应过短 (${fullText.length} chars)，重试第${retryCount}次`);
+        activeCursorReq = await convertToCursorRequest(body);
+        fullText = await sendCursorRequestFull(activeCursorReq);
+        console.log(`[Handler] 非流式：重试响应 (${fullText.length} chars): ${fullText.substring(0, 200)}${fullText.length > 200 ? '...' : ''}`);
+    }
+
+    // ★ 内部截断续写（与流式路径对齐）
+    // Claude CLI 使用非流式模式时，写大文件最容易被截断
+    // 在 proxy 内部完成续写，确保工具调用参数完整
+    const MAX_AUTO_CONTINUE = 6;
+    let continueCount = 0;
+    const originalMessages = [...activeCursorReq.messages];
+
+    while (hasTools && isTruncated(fullText) && continueCount < MAX_AUTO_CONTINUE) {
+        continueCount++;
+        const prevLength = fullText.length;
+        console.log(`[Handler] ⚠️ 非流式：内部检测到截断 (${fullText.length} chars)，Proxy 将隐式请求无缝续写 (第${continueCount}次)...`);
+
+        const anchorLength = Math.min(300, fullText.length);
+        const anchorText = fullText.slice(-anchorLength);
+
+        const continuationPrompt = `Your previous response was cut off mid-output. The last part of your output was:
+
+\`\`\`
+...${anchorText}
+\`\`\`
+
+Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`;
+
+        const continuationReq: CursorChatRequest = {
+            ...activeCursorReq,
+            messages: [
+                ...originalMessages,
+                {
+                    parts: [{ type: 'text', text: fullText }],
+                    id: uuidv4(),
+                    role: 'assistant',
+                },
+                {
+                    parts: [{ type: 'text', text: continuationPrompt }],
+                    id: uuidv4(),
+                    role: 'user',
+                },
+            ],
+        };
+
+        const continuationResponse = await sendCursorRequestFull(continuationReq);
+
+        if (continuationResponse.trim().length === 0) {
+            console.log(`[Handler] ⚠️ 非流式续写返回空响应，停止续写`);
+            break;
+        }
+
+        // ★ 智能去重
+        const deduped = deduplicateContinuation(fullText, continuationResponse);
+        fullText += deduped;
+        if (deduped.length !== continuationResponse.length) {
+            console.log(`[Handler] 非流式续写去重: 移除了 ${continuationResponse.length - deduped.length} chars 的重复内容`);
+        }
+        console.log(`[Handler] 非流式续写拼接完成: ${prevLength} → ${fullText.length} chars (+${deduped.length})`);
+
+        // ★ 无进展检测：去重后没有新内容，停止续写
+        if (deduped.trim().length === 0) {
+            console.log(`[Handler] ⚠️ 非流式续写内容全部为重复，停止续写`);
+            break;
+        }
+    }
+
     const contentBlocks: AnthropicContentBlock[] = [];
     // ★ 截断检测：代码块/XML 未闭合时，返回 max_tokens 让 Claude Code 自动继续
     let stopReason = (hasTools && isTruncated(fullText)) ? 'max_tokens' : 'end_turn';
@@ -844,6 +1017,42 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
     if (hasTools) {
         let { toolCalls, cleanText } = parseToolCalls(fullText);
 
+        // ★ tool_choice=any 强制重试（与流式路径对齐）
+        const toolChoice = body.tool_choice;
+        const TOOL_CHOICE_MAX_RETRIES = 2;
+        let toolChoiceRetry = 0;
+        while (
+            toolChoice?.type === 'any' &&
+            toolCalls.length === 0 &&
+            toolChoiceRetry < TOOL_CHOICE_MAX_RETRIES
+        ) {
+            toolChoiceRetry++;
+            console.log(`[Handler] 非流式：tool_choice=any 但模型未调用工具（第${toolChoiceRetry}次），强制重试...`);
+
+            const forceMessages = [
+                ...activeCursorReq.messages,
+                {
+                    parts: [{ type: 'text' as const, text: fullText || '(no response)' }],
+                    id: uuidv4(),
+                    role: 'assistant' as const,
+                },
+                {
+                    parts: [{
+                        type: 'text' as const,
+                        text: `Your last response did not include any \`\`\`json action block. This is required because tool_choice is "any". You MUST respond using the json action format for at least one action. Do not explain yourself — just output the action block now.`,
+                    }],
+                    id: uuidv4(),
+                    role: 'user' as const,
+                },
+            ];
+            activeCursorReq = { ...activeCursorReq, messages: forceMessages };
+            fullText = await sendCursorRequestFull(activeCursorReq);
+            ({ toolCalls, cleanText } = parseToolCalls(fullText));
+        }
+        if (toolChoice?.type === 'any' && toolCalls.length === 0) {
+            console.log(`[Handler] 非流式：tool_choice=any 重试${TOOL_CHOICE_MAX_RETRIES}次后仍无工具调用`);
+        }
+
         if (toolCalls.length > 0) {
             stopReason = 'tool_use';
 
@@ -889,9 +1098,9 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
         model: body.model,
         stop_reason: stopReason,
         stop_sequence: null,
-        usage: {
-            input_tokens: 100,
-            output_tokens: Math.ceil(fullText.length / 4),
+        usage: { 
+            input_tokens: estimateInputTokens(body), 
+            output_tokens: Math.ceil(fullText.length / 3) 
         },
     };
 
diff --git a/src/openai-handler.ts b/src/openai-handler.ts
index b9e932c..ea8e132 100644
--- a/src/openai-handler.ts
+++ b/src/openai-handler.ts
@@ -36,6 +36,7 @@ import {
     CLAUDE_IDENTITY_RESPONSE,
     CLAUDE_TOOLS_RESPONSE,
     MAX_REFUSAL_RETRIES,
+    estimateInputTokens,
 } from './handler.js';
 
 function chatId(): string {
@@ -622,9 +623,9 @@ async function handleOpenAINonStream(
             finish_reason: finishReason,
         }],
         usage: {
-            prompt_tokens: 100,
-            completion_tokens: Math.ceil(fullText.length / 4),
-            total_tokens: 100 + Math.ceil(fullText.length / 4),
+            prompt_tokens: estimateInputTokens(anthropicReq),
+            completion_tokens: Math.ceil(fullText.length / 3),
+            total_tokens: estimateInputTokens(anthropicReq) + Math.ceil(fullText.length / 3),
         },
     };