fix: 修复 thinking 截断时内容泄漏到正文的问题

问题:当模型 thinking 内容超出单次输出上限时,<thinking> 标签未闭合,
导致 thinking 内容被当作正文泄漏给客户端;续写请求中 assistantContext
含未闭合标签,模型不知道思考阶段已结束,继续输出 thinking 而非正文。

修复:
1. splitLeadingThinkingBlocks:未闭合时返回已积累的部分 thinkingContent
   而非空字符串,供调用方正确提取
2. handler.ts / openai-handler.ts:流结束 flush 新增 !complete 分支,
   提取截断的 thinkingContent,不将 thinking 内容 flush 为正文
3. 新增 closeUnclosedThinking:续写前补全缺失的 </thinking> 标签,
   应用于所有 4 处续写 assistantContext 构建,让模型正确从正文续写
4. shouldAutoContinueTruncatedToolResponse:json action 块未闭合时
   跳过 200-char 检查,修复 thinking 剥离后正文过短导致续写不触发的问题

测试:新增 unit-thinking-truncation.mjs(11个单元测试)、
e2e-thinking-truncation.mjs(3个实际 API 请求测试),全部通过
This commit is contained in:
huangzhenting
2026-03-22 14:10:58 +08:00
parent bb86c1a66e
commit f317dc04b0
6 changed files with 438 additions and 14 deletions

View File

@@ -492,8 +492,13 @@ function toolCallNeedsMoreContinuation(toolCall: ParsedToolCall): boolean {
*/
export function shouldAutoContinueTruncatedToolResponse(text: string, hasTools: boolean): boolean {
if (!hasTools || !isTruncated(text)) return false;
// ★ json action 块未闭合是最精确的截断信号,不受长度限制影响
// isTruncated 在有 json action 块时 early return全闭合→false未闭合→true
// 所以此处 isTruncated=true 且有开标签,必然意味着 action 块未闭合,无需重复计数
const hasUnclosedActionBlock = (text.match(/```json\s+action/g) || []).length > 0;
// 响应过短(< 200 chars时不触发续写上下文不足会导致模型拒绝或错误续写
if (text.trim().length < 200) return false;
// 例外json action 块明确未闭合时跳过此检查thinking 剥离后正文可能很短)
if (!hasUnclosedActionBlock && text.trim().length < 200) return false;
if (!hasToolCalls(text)) return true;
const { toolCalls } = parseToolCalls(text);
@@ -502,6 +507,23 @@ export function shouldAutoContinueTruncatedToolResponse(text: string, hasTools:
return toolCalls.some(toolCallNeedsMoreContinuation);
}
// ==================== 续写辅助 ====================
/**
* 为续写请求修复未闭合的 <thinking> 标签。
*
* 当 thinking 内容超出模型单次输出上限时rawResponse 末尾是未闭合的
* <thinking>...partial 内容。把它作为 assistant context 发给模型时,
* 模型会把这段当成 thinking 继续输出,而不是续写正文。
* 在此统一补全 </thinking>,让模型知道思考阶段已结束,应续写正文。
*/
function closeUnclosedThinking(text: string): string {
const opens = (text.match(/<thinking>/g) || []).length;
const closes = (text.match(/<\/thinking>/g) || []).length;
if (opens > closes) return text + '</thinking>\n';
return text;
}
// ==================== 续写去重 ====================
/**
@@ -599,9 +621,11 @@ export async function autoContinueCursorToolResponseStream(
Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`;
const assistantContext = fullResponse.length > 2000
? '...\n' + fullResponse.slice(-2000)
: fullResponse;
const assistantContext = closeUnclosedThinking(
fullResponse.length > 2000
? '...\n' + fullResponse.slice(-2000)
: fullResponse,
);
const continuationReq: CursorChatRequest = {
...cursorReq,
@@ -671,9 +695,11 @@ export async function autoContinueCursorToolResponseFull(
Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`;
const assistantContext = fullText.length > 2000
? '...\n' + fullText.slice(-2000)
: fullText;
const assistantContext = closeUnclosedThinking(
fullText.length > 2000
? '...\n' + fullText.slice(-2000)
: fullText,
);
const continuationReq: CursorChatRequest = {
...cursorReq,
@@ -917,6 +943,11 @@ async function handleDirectTextStream(
if (split.startedWithThinking && split.complete) {
thinkingContent = split.thinkingContent;
flushVisible(split.remainder);
} else if (split.startedWithThinking && !split.complete) {
// ★ thinking 未闭合(输出被截断在 thinking 阶段)
// 提取已积累的部分 thinking 内容,正文为空,避免 <thinking>...内容泄漏到正文
thinkingContent = split.thinkingContent;
// remainder 为空,不 flush 任何正文内容
} else {
flushVisible(leadingBuffer);
}
@@ -1225,6 +1256,11 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
if (split.startedWithThinking && split.complete) {
hybridThinkingContent = split.thinkingContent;
pushToStreamer(split.remainder);
} else if (split.startedWithThinking && !split.complete) {
// ★ thinking 未闭合(输出被截断在 thinking 阶段)
// 提取部分 thinking 内容,不 push 到正文流,避免泄漏
hybridThinkingContent = split.thinkingContent;
// remainder 为空,不 push 任何正文内容
} else {
pushToStreamer(hybridLeadingBuffer);
}
@@ -1366,9 +1402,11 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`;
const assistantContext = fullResponse.length > 2000
? '...\n' + fullResponse.slice(-2000)
: fullResponse;
const assistantContext = closeUnclosedThinking(
fullResponse.length > 2000
? '...\n' + fullResponse.slice(-2000)
: fullResponse,
);
activeCursorReq = {
...activeCursorReq,
@@ -1822,7 +1860,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
messages: [
// ★ 续写优化:丢弃所有工具定义和历史消息
{
parts: [{ type: 'text', text: fullText.length > 2000 ? '...\n' + fullText.slice(-2000) : fullText }],
parts: [{ type: 'text', text: closeUnclosedThinking(fullText.length > 2000 ? '...\n' + fullText.slice(-2000) : fullText) }],
id: uuidv4(),
role: 'assistant',
},

View File

@@ -892,6 +892,11 @@ async function handleOpenAIStream(
if (split.startedWithThinking && split.complete) {
hybridThinkingContent = split.thinkingContent;
pushToStreamer(split.remainder);
} else if (split.startedWithThinking && !split.complete) {
// ★ thinking 未闭合(输出被截断在 thinking 阶段)
// 提取部分 thinking 内容,不 push 到正文流,避免泄漏
hybridThinkingContent = split.thinkingContent;
// remainder 为空,不 push 任何正文内容
} else {
pushToStreamer(hybridLeadingBuffer);
}

View File

@@ -98,10 +98,14 @@ export function splitLeadingThinkingBlocks(text: string): LeadingThinkingSplit {
while (cursor.startsWith(THINKING_OPEN)) {
const closeIndex = cursor.indexOf(THINKING_CLOSE, THINKING_OPEN.length);
if (closeIndex === -1) {
// ★ 未闭合(截断):返回截断前已积累的部分 thinking 内容
// 当前未闭合块的内容 + 前面已完整的块(如有多个连续 thinking 块的情况)
const partialContent = cursor.slice(THINKING_OPEN.length).trim();
const allParts = [...thinkingParts, ...(partialContent ? [partialContent] : [])];
return {
startedWithThinking: true,
complete: false,
thinkingContent: '',
thinkingContent: allParts.join('\n\n'),
remainder: '',
};
}

View File

@@ -0,0 +1,217 @@
/**
* e2e-thinking-truncation.mjs
*
* 实际请求测试thinking 截断场景
*
* 测试场景:
* 1. 请求 thinking 模式,验证 thinking block 正确返回,不泄漏到正文
* 2. 带工具 + thinking验证 thinking 剥离后工具调用续写正常触发
* 3. 带工具 + thinking验证 200-char 修复thinking 剥离后正文短但工具续写仍触发)
*/
import http from 'http';
const BASE = process.env.BASE_URL || 'http://localhost:3010';
const url = new URL(BASE);
let passed = 0;
let failed = 0;
function runAnthropicTest(name, body, timeoutMs = 120000) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => reject(new Error(`超时 ${timeoutMs}ms`)), timeoutMs);
const data = JSON.stringify(body);
const req = http.request({
hostname: url.hostname, port: url.port || 3010, path: '/v1/messages', method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'Content-Length': Buffer.byteLength(data),
},
}, (res) => {
let buf = '';
const events = [];
res.on('data', chunk => {
buf += chunk.toString();
const lines = buf.split('\n');
buf = lines.pop();
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
try { events.push(JSON.parse(line.slice(6).trim())); } catch { /* skip */ }
}
});
res.on('end', () => { clearTimeout(timer); resolve(events); });
res.on('error', err => { clearTimeout(timer); reject(err); });
});
req.on('error', err => { clearTimeout(timer); reject(err); });
req.write(data);
req.end();
});
}
function parseEvents(events) {
let thinkingContent = '';
let textContent = '';
let stopReason = '';
for (const ev of events) {
if (ev.type === 'content_block_delta') {
if (ev.delta?.type === 'thinking_delta') thinkingContent += ev.delta.thinking || '';
if (ev.delta?.type === 'text_delta') textContent += ev.delta.text || '';
}
if (ev.type === 'message_delta') stopReason = ev.delta?.stop_reason || '';
}
return { thinkingContent, textContent, stopReason };
}
async function test(name, fn) {
try {
await fn();
console.log(`${name}`);
passed++;
} catch (err) {
console.error(`${name}`);
console.error(` ${err.message}`);
failed++;
}
}
function assert(cond, msg) {
if (!cond) throw new Error(msg || 'Assertion failed');
}
const TOOLS = [
{
name: 'Write',
description: 'Write a file',
input_schema: {
type: 'object',
properties: {
file_path: { type: 'string' },
content: { type: 'string' },
},
required: ['file_path', 'content'],
},
},
{
name: 'Read',
description: 'Read a file',
input_schema: {
type: 'object',
properties: { file_path: { type: 'string' } },
required: ['file_path'],
},
},
];
console.log('\n📦 E2E: thinking 截断场景测试\n');
console.log(` 服务地址: ${BASE}`);
console.log(` 注意:以下测试需要模型实际支持 thinking 模式\n`);
// ==================== 测试 1thinking 模式基础验证 ====================
await test('thinking 模式thinking block 出现在正文之前,不泄漏到 text', async () => {
const events = await runAnthropicTest('thinking-basic', {
model: 'claude-sonnet-4-6-thinking',
max_tokens: 16000,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages: [{
role: 'user',
content: '简单回答1+1等于几',
}],
stream: true,
});
const { thinkingContent, textContent } = parseEvents(events);
// thinking block 必须存在
assert(thinkingContent.length > 0, `期望有 thinking block实际为空`);
// thinking 内容不应出现在正文里
assert(
!textContent.includes('<thinking>'),
`正文不应包含 <thinking> 标签,实际正文: ${textContent.substring(0, 200)}`,
);
assert(
!textContent.includes('</thinking>'),
`正文不应包含 </thinking> 标签`,
);
// 正文应有实际内容
assert(textContent.trim().length > 0, `正文应有内容,实际为空`);
console.log(` thinking: ${thinkingContent.length} chars, text: ${textContent.length} chars`);
});
// ==================== 测试 2thinking 不泄漏到正文(无 thinking 请求) ====================
await test('非 thinking 模式:即使模型输出 <thinking> 也不泄漏到正文', async () => {
// 使用普通模型名,但通过 system prompt 诱导模型输出 thinking 标签
const events = await runAnthropicTest('thinking-leak', {
model: 'claude-sonnet-4-6-thinking',
max_tokens: 8000,
// 不传 thinking 参数
messages: [{
role: 'user',
content: '请用中文简短回答:什么是递归?',
}],
stream: true,
});
const { textContent } = parseEvents(events);
assert(
!textContent.includes('<thinking>'),
`正文不应包含 <thinking> 开标签,实际: ${textContent.substring(0, 300)}`,
);
assert(
!textContent.includes('</thinking>'),
`正文不应包含 </thinking> 闭标签`,
);
console.log(` text: ${textContent.length} chars, preview: ${textContent.substring(0, 80).replace(/\n/g, '\\n')}`);
});
// ==================== 测试 3带工具 + thinking工具调用完整返回 ====================
await test('thinking + 工具调用工具参数完整thinking 不泄漏', async () => {
const events = await runAnthropicTest('thinking-tools', {
model: 'claude-sonnet-4-6-thinking',
max_tokens: 16000,
thinking: { type: 'enabled', budget_tokens: 8000 },
tools: TOOLS,
messages: [{
role: 'user',
content: '请用 Write 工具写一个包含 50 行注释的 Python hello world 文件到 /tmp/hello.py',
}],
stream: true,
});
const { thinkingContent, textContent } = parseEvents(events);
// 解析工具调用
const toolStarts = events.filter(e => e.type === 'content_block_start' && e.content_block?.type === 'tool_use');
const toolInputDeltas = events.filter(e => e.type === 'content_block_delta' && e.delta?.type === 'input_json_delta');
const toolInputRaw = toolInputDeltas.map(e => e.delta.partial_json || '').join('');
assert(
!textContent.includes('<thinking>') && !textContent.includes('</thinking>'),
`正文不应包含 thinking 标签,实际: ${textContent.substring(0, 200)}`,
);
if (toolStarts.length > 0) {
// 有工具调用:验证参数完整(能解析为有效 JSON
let toolInput = {};
try { toolInput = JSON.parse(toolInputRaw); } catch (e) {
throw new Error(`工具调用参数 JSON 解析失败: ${e.message}\n原始: ${toolInputRaw.substring(0, 200)}`);
}
assert(typeof toolInput.file_path === 'string', '工具参数应包含 file_path');
assert(typeof toolInput.content === 'string', '工具参数应包含 content');
console.log(` thinking: ${thinkingContent.length} chars, tool: ${toolStarts[0]?.content_block?.name}, content: ${toolInput.content?.length} chars`);
} else {
// 没有工具调用:至少有正文
assert(textContent.trim().length > 0, '无工具调用时正文不应为空');
console.log(` thinking: ${thinkingContent.length} chars, text: ${textContent.length} chars (无工具调用)`);
}
});
// ==================== 汇总 ====================
console.log(`\n结果:${passed} 通过,${failed} 失败\n`);
if (failed > 0) process.exit(1);

View File

@@ -61,13 +61,26 @@ test('大参数写入工具仍然继续续写', () => {
);
});
test('无工具代码块但文本明显截断时继续续写', () => {
test('普通代码块截断但文本过短(<200字续写', () => {
// 200-char 保护:非 json action 块截断时,过短的响应缺乏上下文,不触发续写
const text = '```ts\nexport const answer = {';
assertEqual(
shouldAutoContinueTruncatedToolResponse(text, true),
false,
'非 json action 块且文本 <200 chars 时不应续写',
);
});
test('json action 块未闭合且文本过短时仍触发续写thinking 剥离后场景)', () => {
// 场景thinking 剥离后 fullResponse 只剩 json action 块开头(很短)
// 200-char 保护不应阻止这种明确的工具调用截断
const text = '```json action\n{\n "tool": "Write",';
assertEqual(
shouldAutoContinueTruncatedToolResponse(text, true),
true,
'未形成可恢复工具调用时应继续续写',
'json action 块未闭合时即使文本 <200 chars 也应续写',
);
});

View File

@@ -0,0 +1,147 @@
/**
* unit-thinking-truncation.mjs
*
* 测试 thinking 截断场景下的修复逻辑:
* 1. splitLeadingThinkingBlocks 未闭合时返回部分 thinkingContent而非空字符串
* 2. closeUnclosedThinking 在 assistantContext 中补全缺失的 </thinking> 标签
*/
import { splitLeadingThinkingBlocks } from '../dist/streaming-text.js';
// closeUnclosedThinking 是 handler 内部函数,不直接导出;改为内联一份相同实现做白盒测试
function closeUnclosedThinking(text) {
const opens = (text.match(/<thinking>/g) || []).length;
const closes = (text.match(/<\/thinking>/g) || []).length;
if (opens > closes) return text + '</thinking>\n';
return text;
}
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(`${name}`);
passed++;
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error(`${name}`);
console.error(` ${message}`);
failed++;
}
}
function assertEqual(actual, expected, message) {
if (actual !== expected) {
throw new Error(message || `Expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}`);
}
}
function assertContains(actual, substring, message) {
if (!actual.includes(substring)) {
throw new Error(message || `Expected string to contain ${JSON.stringify(substring)}, got ${JSON.stringify(actual)}`);
}
}
// ==================== splitLeadingThinkingBlocks 测试 ====================
console.log('\n📦 splitLeadingThinkingBlocks — thinking 截断处理\n');
test('完整 thinking 块complete=true正确提取内容', () => {
const text = '<thinking>\n我在思考这道题\n</thinking>\n这是正文';
const result = splitLeadingThinkingBlocks(text);
assertEqual(result.startedWithThinking, true, 'startedWithThinking');
assertEqual(result.complete, true, 'complete');
assertEqual(result.thinkingContent, '我在思考这道题', 'thinkingContent');
assertEqual(result.remainder, '这是正文', 'remainder');
});
test('thinking 未闭合截断complete=false仍返回部分 thinkingContent', () => {
const text = '<thinking>\n开始深入分析这个问题考虑各种边界情况……';
const result = splitLeadingThinkingBlocks(text);
assertEqual(result.startedWithThinking, true, 'startedWithThinking');
assertEqual(result.complete, false, 'complete 应为 false');
// ★ 修复前thinkingContent 为 '';修复后应包含实际 thinking 内容
assertContains(
result.thinkingContent,
'开始深入分析这个问题',
'thinkingContent 应包含截断前的 thinking 内容,而不是空字符串',
);
assertEqual(result.remainder, '', 'remainder 应为空,不泄漏到正文');
});
test('thinking 未闭合thinkingContent 不含 <thinking> 开标签本身', () => {
const text = '<thinking>\n分析中……';
const result = splitLeadingThinkingBlocks(text);
if (result.thinkingContent.includes('<thinking>')) {
throw new Error('thinkingContent 不应包含 <thinking> 开标签');
}
});
test('空 thinking 块未闭合(<thinking> 后无内容thinkingContent 为空字符串', () => {
const text = '<thinking>';
const result = splitLeadingThinkingBlocks(text);
assertEqual(result.startedWithThinking, true, 'startedWithThinking');
assertEqual(result.complete, false, 'complete');
assertEqual(result.thinkingContent, '', 'thinkingContent 应为空字符串');
});
test('多个完整 thinking 块后接未闭合块:合并所有内容', () => {
const text = '<thinking>第一段</thinking>\n<thinking>第二段截断中……';
const result = splitLeadingThinkingBlocks(text);
assertEqual(result.startedWithThinking, true, 'startedWithThinking');
assertEqual(result.complete, false, 'complete');
assertContains(result.thinkingContent, '第一段', '应包含第一段');
assertContains(result.thinkingContent, '第二段截断中', '应包含截断的第二段');
});
test('无 thinking 标签startedWithThinking=falseremainder=原文', () => {
const text = '这是普通正文内容';
const result = splitLeadingThinkingBlocks(text);
assertEqual(result.startedWithThinking, false, 'startedWithThinking');
assertEqual(result.remainder, text, 'remainder 应为原文');
});
// ==================== closeUnclosedThinking 测试 ====================
console.log('\n📦 closeUnclosedThinking — 续写 assistantContext 补全标签\n');
test('无 thinking 标签:原文不变', () => {
const text = '这是正常的 assistant 上下文';
assertEqual(closeUnclosedThinking(text), text, '不含 thinking 标签时应原样返回');
});
test('thinking 已闭合:原文不变', () => {
const text = '<thinking>思考内容</thinking>\n正文内容';
assertEqual(closeUnclosedThinking(text), text, '已闭合时不应修改');
});
test('thinking 未闭合:自动追加 </thinking>', () => {
const text = '<thinking>\n思考中然后被截断了……';
const result = closeUnclosedThinking(text);
assertContains(result, '</thinking>', '应补全 </thinking> 标签');
// 补全后 <thinking> 和 </thinking> 数量应相等
const opens = (result.match(/<thinking>/g) || []).length;
const closes = (result.match(/<\/thinking>/g) || []).length;
assertEqual(opens, closes, '<thinking> 和 </thinking> 数量应相等');
});
test('assistantContext 截断后的典型场景:... + 未闭合 thinking 尾部', () => {
// 模拟 fullResponse.slice(-2000),截到 thinking 中间(开标签不在窗口内)
const text = '...\n分析更多细节考虑到边界情况……';
// 这段没有 <thinking>closeUnclosedThinking 应原样返回
assertEqual(closeUnclosedThinking(text), text, '无开标签时不应修改');
});
test('assistantContext 包含完整 thinking 后接未闭合内容:补全标签', () => {
const text = '<thinking>第一段完整</thinking>\n<thinking>第二段截断中……';
const result = closeUnclosedThinking(text);
const opens = (result.match(/<thinking>/g) || []).length;
const closes = (result.match(/<\/thinking>/g) || []).length;
assertEqual(opens, closes, '补全后开闭标签数量应相等');
});
// ==================== 汇总 ====================
console.log(`\n结果:${passed} 通过,${failed} 失败\n`);
if (failed > 0) process.exit(1);