feat: 支持 Claude Code thinking 显示 (adaptive 模式)

- 修复 thinking 类型检测：支持 Claude Code 的 `adaptive` 类型（之前只检查 `enabled`） - 区分 GUI/CLI thinking 策略：GUI(enabled)发 content block，CLI(adaptive)保留标签在正文中 - few-shot 示例包含 thinking：当 thinking 启用时，assistant 回复中演示 `<thinking>` 标签 - 优化短响应重试：仅在响应 <3 字符且不含数字时重试，避免 `2` `25岁` 等短回答被误判 - 验证发现 Claude Code signature 为密码学签名，无法伪造
2026-05-18 04:26:17 +08:00 · 2026-03-16 16:48:58 +08:00
parent a848706613
commit 3458de0ac8
3 changed files with 61 additions and 25 deletions
--- a/src/converter.ts
+++ b/src/converter.ts
@@ -175,10 +175,12 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
        combinedSystem = combinedSystem.replace(/^x-anthropic-billing-header[^\n]*$/gim, '');
        combinedSystem = combinedSystem.replace(/\n{3,}/g, '\n\n').trim();
    }
-
-    // ★ Thinking 提示注入：当客户端请求 thinking 时，引导模型使用 <thinking> 标签
-    if (req.thinking?.type === 'enabled') {
-        const thinkingHint = '\n\nBefore responding, think through the problem step by step inside <thinking>...</thinking> tags. Your thinking will be extracted and returned separately. After thinking, provide your actual response outside the tags.';
+    // ★ Thinking 提示注入：根据是否有工具选择不同的注入位置
+    // 有工具时：放在工具指令末尾（不会被工具定义覆盖，模型更容易注意）
+    // 无工具时：放在系统提示词末尾（原有行为，已验证有效）
+    const thinkingEnabled = req.thinking?.type === 'enabled' || req.thinking?.type === 'adaptive';
+    const thinkingHint = '\n\n**IMPORTANT**: Before your response, you MUST first think through the problem step by step inside <thinking>...</thinking> tags. Your thinking process will be extracted and shown separately. After the closing </thinking> tag, provide your actual response or actions.';
+    if (thinkingEnabled && !hasTools) {
        combinedSystem = (combinedSystem || '') + thinkingHint;
    }

@@ -189,6 +191,11 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
        const hasCommunicationTool = tools.some(t => ['attempt_completion', 'ask_followup_question', 'AskFollowupQuestion'].includes(t.name));
        let toolInstructions = buildToolInstructions(tools, hasCommunicationTool, toolChoice);

+        // ★ 有工具时：thinking 提示放在工具指令末尾（模型注意力最强的位置之一）
+        if (thinkingEnabled) {
+            toolInstructions += thinkingHint;
+        }
+
        // 系统提示词与工具指令合并
        toolInstructions = combinedSystem + '\n\n---\n\n' + toolInstructions;

@@ -214,8 +221,14 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
            id: shortId(),
            role: 'user',
        });
+        // ★ 当 thinking 启用时，few-shot 示例也包含 <thinking> 标签
+        // few-shot 是让模型遵循输出格式最强力的手段
+        const fewShotAction = `\`\`\`json action\n${JSON.stringify({ tool: fewShotTool.name, parameters: fewShotParams }, null, 2)}\n\`\`\``;
+        const fewShotResponse = thinkingEnabled
+            ? `<thinking>\nThe user wants me to help with their project. I should start by examining the project structure to understand what we're working with.\n</thinking>\n\nLet me start by examining the project structure.\n\n${fewShotAction}`
+            : `Understood. I'll use the structured format for actions. Here's how I'll respond:\n\n${fewShotAction}`;
        messages.push({
-            parts: [{ type: 'text', text: `Understood. I'll use the structured format for actions. Here's how I'll respond:\n\n\`\`\`json action\n${JSON.stringify({ tool: fewShotTool.name, parameters: fewShotParams }, null, 2)}\n\`\`\`` }],
+            parts: [{ type: 'text', text: fewShotResponse }],
            id: shortId(),
            role: 'assistant',
        });
@@ -270,7 +283,12 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur

                actualQuery = actualQuery.trim();

-                let wrapped = `${actualQuery}\n\nRespond with the appropriate action using the structured format.`;
+                // ★ 判断是否是最后一条用户消息（模型即将回答的那条）
+                const isLastUserMsg = !req.messages.slice(i + 1).some(m => m.role === 'user');
+                const thinkingSuffix = (thinkingEnabled && isLastUserMsg)
+                    ? '\n\nFirst, think step by step inside <thinking>...</thinking> tags. Then respond with the appropriate action using the structured format.'
+                    : '\n\nRespond with the appropriate action using the structured format.';
+                let wrapped = `${actualQuery}${thinkingSuffix}`;

                if (tagsPrefix) {
                    text = `${tagsPrefix}\n${wrapped}`;
--- a/src/handler.ts
+++ b/src/handler.ts
@@ -444,15 +444,23 @@ export async function handleMessages(req: Request, res: Response): Promise<void>
        // 转换为 Cursor 请求
        log.startPhase('convert', '格式转换');
        log.info('Handler', 'convert', '开始转换为 Cursor 请求格式');
+        // ★ 区分客户端 thinking 模式：
+        // - enabled: GUI 插件，支持渲染 thinking content block
+        // - adaptive: Claude Code，需要密码学 signature 验证，无法伪造 → 保留标签在正文中
+        const clientRequestedThinking = body.thinking?.type === 'enabled';
+        // ★ Thinking 默认启用：Claude Code 等客户端可能不传 thinking 参数，proxy 层自动补上
+        if (!body.thinking) {
+            body.thinking = { type: 'enabled' };
+        }
        const cursorReq = await convertToCursorRequest(body);
        log.endPhase();
        log.recordCursorRequest(cursorReq);
-        log.debug('Handler', 'convert', `转换完成: ${cursorReq.messages.length} messages, model=${cursorReq.model}`);
+        log.debug('Handler', 'convert', `转换完成: ${cursorReq.messages.length} messages, model=${cursorReq.model}, clientThinking=${clientRequestedThinking}, thinkingType=${body.thinking?.type}`);

        if (body.stream) {
-            await handleStream(res, cursorReq, body, log);
+            await handleStream(res, cursorReq, body, log, clientRequestedThinking);
        } else {
-            await handleNonStream(res, cursorReq, body, log);
+            await handleNonStream(res, cursorReq, body, log, clientRequestedThinking);
        }
    } catch (err: unknown) {
        const message = err instanceof Error ? err.message : String(err);
@@ -616,7 +624,7 @@ export function buildRetryRequest(body: AnthropicRequest, attempt: number): Anth

 // ==================== 流式处理 ====================

-async function handleStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger): Promise<void> {
+async function handleStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger, clientRequestedThinking: boolean = false): Promise<void> {
    // 设置 SSE headers
    res.writeHead(200, {
        'Content-Type': 'text/event-stream',
@@ -681,16 +689,21 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
        });

        // ★ Thinking 提取（在拒绝检测之前，防止 thinking 内容触发 isRefusal 误判）
-        const thinkingEnabled = body.thinking?.type === 'enabled';
        let thinkingContent = '';
        if (fullResponse.includes('<thinking>')) {
            const thinkingMatch = fullResponse.match(/<thinking>([\s\S]*?)<\/thinking>/g);
            if (thinkingMatch) {
                thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n');
-                fullResponse = fullResponse.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
-                log.info('Handler', 'thinking', `剥离 thinking: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`);
                log.recordThinking(thinkingContent);
                log.updateSummary({ thinkingChars: thinkingContent.length });
+                if (clientRequestedThinking) {
+                    // 客户端原生请求 thinking → 剥离标签，稍后发送 thinking content block
+                    fullResponse = fullResponse.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
+                    log.info('Handler', 'thinking', `剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`);
+                } else {
+                    // proxy 注入的 thinking → 保留标签在正文中，Claude Code 可直接显示
+                    log.info('Handler', 'thinking', `保留 thinking 在正文中 (非客户端请求): ${thinkingContent.length} chars`);
+                }
            }
        }

@@ -729,10 +742,11 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
            }
        }

-        // 极短响应重试（可能是连接中断）
-        if (hasTools && fullResponse.trim().length < 10 && retryCount < MAX_REFUSAL_RETRIES) {
+        // 极短响应重试（仅在响应几乎为空时触发，避免误判正常短回答如 "2" 或 "25岁"）
+        const trimmed = fullResponse.trim();
+        if (hasTools && trimmed.length < 3 && !trimmed.match(/\d/) && retryCount < MAX_REFUSAL_RETRIES) {
            retryCount++;
-            log.warn('Handler', 'retry', `响应过短 (${fullResponse.length} chars)，重试第${retryCount}次`);
+            log.warn('Handler', 'retry', `响应过短 (${fullResponse.length} chars: "${trimmed}")，重试第${retryCount}次`);
            activeCursorReq = await convertToCursorRequest(body);
            await executeStream();
            log.info('Handler', 'retry', `重试响应: ${fullResponse.length} chars`, { preview: fullResponse.substring(0, 200) });
@@ -835,9 +849,10 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
            log.warn('Handler', 'truncation', `${MAX_AUTO_CONTINUE}次续写后仍截断 (${fullResponse.length} chars) → stop_reason=max_tokens`);
        }

-        // ★ Thinking 块发送：在实际内容之前发送 thinking content block
+        // ★ Thinking 块发送：仅 GUI 插件（enabled）才发 thinking content block
+        // Claude Code（adaptive）需要密码学 signature 验证，无法伪造，所以保留标签在正文中
        log.startPhase('stream', 'SSE 输出');
-        if (thinkingEnabled && thinkingContent) {
+        if (clientRequestedThinking && thinkingContent) {
            writeSSE(res, 'content_block_start', {
                type: 'content_block_start', index: blockIndex,
                content_block: { type: 'thinking', thinking: '' },
@@ -1038,7 +1053,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener

 // ==================== 非流式处理 ====================

-async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger): Promise<void> {
+async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger, clientRequestedThinking: boolean = false): Promise<void> {
    // ★ 非流式保活：手动设置 chunked 响应，在缓冲期间每 15s 发送空白字符保活
    // JSON.parse 会忽略前导空白，所以客户端解析不受影响
    res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -1068,14 +1083,17 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
    });

    // ★ Thinking 提取（在拒绝检测之前）
-    const thinkingEnabled = body.thinking?.type === 'enabled';
    let thinkingContent = '';
    if (fullText.includes('<thinking>')) {
        const thinkingMatch = fullText.match(/<thinking>([\s\S]*?)<\/thinking>/g);
        if (thinkingMatch) {
            thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n');
-            fullText = fullText.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
-            log.info('Handler', 'thinking', `非流式剥离 thinking: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`);
+            if (clientRequestedThinking) {
+                fullText = fullText.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
+                log.info('Handler', 'thinking', `非流式剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`);
+            } else {
+                log.info('Handler', 'thinking', `非流式保留 thinking 在正文中: ${thinkingContent.length} chars`);
+            }
        }
    }

@@ -1198,8 +1216,8 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener

    const contentBlocks: AnthropicContentBlock[] = [];

-    // ★ Thinking 内容作为第一个 content block
-    if (thinkingEnabled && thinkingContent) {
+    // ★ Thinking 内容作为第一个 content block（仅客户端原生请求时）
+    if (clientRequestedThinking && thinkingContent) {
        contentBlocks.push({ type: 'thinking' as any, thinking: thinkingContent } as any);
    }

--- a/src/types.ts
+++ b/src/types.ts
@@ -11,7 +11,7 @@ export interface AnthropicRequest {
    temperature?: number;
    top_p?: number;
    stop_sequences?: string[];
-    thinking?: { type: 'enabled' | 'disabled'; budget_tokens?: number };
+    thinking?: { type: 'enabled' | 'disabled' | 'adaptive'; budget_tokens?: number };
 }

 /** tool_choice 控制模型是否必须调用工具