feat: 支持 Claude Code thinking 显示 (adaptive 模式)

- 修复 thinking 类型检测:支持 Claude Code 的 `adaptive` 类型(之前只检查 `enabled`)
- 区分 GUI/CLI thinking 策略:GUI(enabled)发 content block,CLI(adaptive)保留标签在正文中
- few-shot 示例包含 thinking:当 thinking 启用时,assistant 回复中演示 `<thinking>` 标签
- 优化短响应重试:仅在响应 <3 字符且不含数字时重试,避免 `2` `25岁` 等短回答被误判
- 验证发现 Claude Code signature 为密码学签名,无法伪造
This commit is contained in:
小海
2026-03-16 16:48:58 +08:00
parent a848706613
commit 3458de0ac8
3 changed files with 61 additions and 25 deletions

View File

@@ -175,10 +175,12 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
combinedSystem = combinedSystem.replace(/^x-anthropic-billing-header[^\n]*$/gim, '');
combinedSystem = combinedSystem.replace(/\n{3,}/g, '\n\n').trim();
}
// ★ Thinking 提示注入:当客户端请求 thinking 时,引导模型使用 <thinking> 标签
if (req.thinking?.type === 'enabled') {
const thinkingHint = '\n\nBefore responding, think through the problem step by step inside <thinking>...</thinking> tags. Your thinking will be extracted and returned separately. After thinking, provide your actual response outside the tags.';
// ★ Thinking 提示注入:根据是否有工具选择不同的注入位置
// 有工具时:放在工具指令末尾(不会被工具定义覆盖,模型更容易注意)
// 无工具时:放在系统提示词末尾(原有行为,已验证有效)
const thinkingEnabled = req.thinking?.type === 'enabled' || req.thinking?.type === 'adaptive';
const thinkingHint = '\n\n**IMPORTANT**: Before your response, you MUST first think through the problem step by step inside <thinking>...</thinking> tags. Your thinking process will be extracted and shown separately. After the closing </thinking> tag, provide your actual response or actions.';
if (thinkingEnabled && !hasTools) {
combinedSystem = (combinedSystem || '') + thinkingHint;
}
@@ -189,6 +191,11 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
const hasCommunicationTool = tools.some(t => ['attempt_completion', 'ask_followup_question', 'AskFollowupQuestion'].includes(t.name));
let toolInstructions = buildToolInstructions(tools, hasCommunicationTool, toolChoice);
// ★ 有工具时thinking 提示放在工具指令末尾(模型注意力最强的位置之一)
if (thinkingEnabled) {
toolInstructions += thinkingHint;
}
// 系统提示词与工具指令合并
toolInstructions = combinedSystem + '\n\n---\n\n' + toolInstructions;
@@ -214,8 +221,14 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
id: shortId(),
role: 'user',
});
// ★ 当 thinking 启用时few-shot 示例也包含 <thinking> 标签
// few-shot 是让模型遵循输出格式最强力的手段
const fewShotAction = `\`\`\`json action\n${JSON.stringify({ tool: fewShotTool.name, parameters: fewShotParams }, null, 2)}\n\`\`\``;
const fewShotResponse = thinkingEnabled
? `<thinking>\nThe user wants me to help with their project. I should start by examining the project structure to understand what we're working with.\n</thinking>\n\nLet me start by examining the project structure.\n\n${fewShotAction}`
: `Understood. I'll use the structured format for actions. Here's how I'll respond:\n\n${fewShotAction}`;
messages.push({
parts: [{ type: 'text', text: `Understood. I'll use the structured format for actions. Here's how I'll respond:\n\n\`\`\`json action\n${JSON.stringify({ tool: fewShotTool.name, parameters: fewShotParams }, null, 2)}\n\`\`\`` }],
parts: [{ type: 'text', text: fewShotResponse }],
id: shortId(),
role: 'assistant',
});
@@ -270,7 +283,12 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
actualQuery = actualQuery.trim();
let wrapped = `${actualQuery}\n\nRespond with the appropriate action using the structured format.`;
// ★ 判断是否是最后一条用户消息(模型即将回答的那条)
const isLastUserMsg = !req.messages.slice(i + 1).some(m => m.role === 'user');
const thinkingSuffix = (thinkingEnabled && isLastUserMsg)
? '\n\nFirst, think step by step inside <thinking>...</thinking> tags. Then respond with the appropriate action using the structured format.'
: '\n\nRespond with the appropriate action using the structured format.';
let wrapped = `${actualQuery}${thinkingSuffix}`;
if (tagsPrefix) {
text = `${tagsPrefix}\n${wrapped}`;

View File

@@ -444,15 +444,23 @@ export async function handleMessages(req: Request, res: Response): Promise<void>
// 转换为 Cursor 请求
log.startPhase('convert', '格式转换');
log.info('Handler', 'convert', '开始转换为 Cursor 请求格式');
// ★ 区分客户端 thinking 模式:
// - enabled: GUI 插件,支持渲染 thinking content block
// - adaptive: Claude Code需要密码学 signature 验证,无法伪造 → 保留标签在正文中
const clientRequestedThinking = body.thinking?.type === 'enabled';
// ★ Thinking 默认启用Claude Code 等客户端可能不传 thinking 参数proxy 层自动补上
if (!body.thinking) {
body.thinking = { type: 'enabled' };
}
const cursorReq = await convertToCursorRequest(body);
log.endPhase();
log.recordCursorRequest(cursorReq);
log.debug('Handler', 'convert', `转换完成: ${cursorReq.messages.length} messages, model=${cursorReq.model}`);
log.debug('Handler', 'convert', `转换完成: ${cursorReq.messages.length} messages, model=${cursorReq.model}, clientThinking=${clientRequestedThinking}, thinkingType=${body.thinking?.type}`);
if (body.stream) {
await handleStream(res, cursorReq, body, log);
await handleStream(res, cursorReq, body, log, clientRequestedThinking);
} else {
await handleNonStream(res, cursorReq, body, log);
await handleNonStream(res, cursorReq, body, log, clientRequestedThinking);
}
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
@@ -616,7 +624,7 @@ export function buildRetryRequest(body: AnthropicRequest, attempt: number): Anth
// ==================== 流式处理 ====================
async function handleStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger): Promise<void> {
async function handleStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger, clientRequestedThinking: boolean = false): Promise<void> {
// 设置 SSE headers
res.writeHead(200, {
'Content-Type': 'text/event-stream',
@@ -681,16 +689,21 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
});
// ★ Thinking 提取(在拒绝检测之前,防止 thinking 内容触发 isRefusal 误判)
const thinkingEnabled = body.thinking?.type === 'enabled';
let thinkingContent = '';
if (fullResponse.includes('<thinking>')) {
const thinkingMatch = fullResponse.match(/<thinking>([\s\S]*?)<\/thinking>/g);
if (thinkingMatch) {
thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n');
fullResponse = fullResponse.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
log.info('Handler', 'thinking', `剥离 thinking: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`);
log.recordThinking(thinkingContent);
log.updateSummary({ thinkingChars: thinkingContent.length });
if (clientRequestedThinking) {
// 客户端原生请求 thinking → 剥离标签,稍后发送 thinking content block
fullResponse = fullResponse.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
log.info('Handler', 'thinking', `剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`);
} else {
// proxy 注入的 thinking → 保留标签在正文中Claude Code 可直接显示
log.info('Handler', 'thinking', `保留 thinking 在正文中 (非客户端请求): ${thinkingContent.length} chars`);
}
}
}
@@ -729,10 +742,11 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
}
}
// 极短响应重试(可能是连接中断
if (hasTools && fullResponse.trim().length < 10 && retryCount < MAX_REFUSAL_RETRIES) {
// 极短响应重试(仅在响应几乎为空时触发,避免误判正常短回答如 "2" 或 "25岁"
const trimmed = fullResponse.trim();
if (hasTools && trimmed.length < 3 && !trimmed.match(/\d/) && retryCount < MAX_REFUSAL_RETRIES) {
retryCount++;
log.warn('Handler', 'retry', `响应过短 (${fullResponse.length} chars),重试第${retryCount}`);
log.warn('Handler', 'retry', `响应过短 (${fullResponse.length} chars: "${trimmed}"),重试第${retryCount}`);
activeCursorReq = await convertToCursorRequest(body);
await executeStream();
log.info('Handler', 'retry', `重试响应: ${fullResponse.length} chars`, { preview: fullResponse.substring(0, 200) });
@@ -835,9 +849,10 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
log.warn('Handler', 'truncation', `${MAX_AUTO_CONTINUE}次续写后仍截断 (${fullResponse.length} chars) → stop_reason=max_tokens`);
}
// ★ Thinking 块发送:在实际内容之前发送 thinking content block
// ★ Thinking 块发送:仅 GUI 插件enabled才发 thinking content block
// Claude Codeadaptive需要密码学 signature 验证,无法伪造,所以保留标签在正文中
log.startPhase('stream', 'SSE 输出');
if (thinkingEnabled && thinkingContent) {
if (clientRequestedThinking && thinkingContent) {
writeSSE(res, 'content_block_start', {
type: 'content_block_start', index: blockIndex,
content_block: { type: 'thinking', thinking: '' },
@@ -1038,7 +1053,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
// ==================== 非流式处理 ====================
async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger): Promise<void> {
async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger, clientRequestedThinking: boolean = false): Promise<void> {
// ★ 非流式保活:手动设置 chunked 响应,在缓冲期间每 15s 发送空白字符保活
// JSON.parse 会忽略前导空白,所以客户端解析不受影响
res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -1068,14 +1083,17 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
});
// ★ Thinking 提取(在拒绝检测之前)
const thinkingEnabled = body.thinking?.type === 'enabled';
let thinkingContent = '';
if (fullText.includes('<thinking>')) {
const thinkingMatch = fullText.match(/<thinking>([\s\S]*?)<\/thinking>/g);
if (thinkingMatch) {
thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n');
fullText = fullText.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
log.info('Handler', 'thinking', `非流式剥离 thinking: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`);
if (clientRequestedThinking) {
fullText = fullText.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
log.info('Handler', 'thinking', `非流式剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`);
} else {
log.info('Handler', 'thinking', `非流式保留 thinking 在正文中: ${thinkingContent.length} chars`);
}
}
}
@@ -1198,8 +1216,8 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
const contentBlocks: AnthropicContentBlock[] = [];
// ★ Thinking 内容作为第一个 content block
if (thinkingEnabled && thinkingContent) {
// ★ Thinking 内容作为第一个 content block(仅客户端原生请求时)
if (clientRequestedThinking && thinkingContent) {
contentBlocks.push({ type: 'thinking' as any, thinking: thinkingContent } as any);
}

View File

@@ -11,7 +11,7 @@ export interface AnthropicRequest {
temperature?: number;
top_p?: number;
stop_sequences?: string[];
thinking?: { type: 'enabled' | 'disabled'; budget_tokens?: number };
thinking?: { type: 'enabled' | 'disabled' | 'adaptive'; budget_tokens?: number };
}
/** tool_choice 控制模型是否必须调用工具