mirror of
https://github.com/7836246/cursor2api.git
synced 2026-05-18 04:26:17 +08:00
feat: 支持 Claude Code thinking 显示 (adaptive 模式)
- 修复 thinking 类型检测:支持 Claude Code 的 `adaptive` 类型(之前只检查 `enabled`) - 区分 GUI/CLI thinking 策略:GUI(enabled)发 content block,CLI(adaptive)保留标签在正文中 - few-shot 示例包含 thinking:当 thinking 启用时,assistant 回复中演示 `<thinking>` 标签 - 优化短响应重试:仅在响应 <3 字符且不含数字时重试,避免 `2` `25岁` 等短回答被误判 - 验证发现 Claude Code signature 为密码学签名,无法伪造
This commit is contained in:
@@ -175,10 +175,12 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
|
||||
combinedSystem = combinedSystem.replace(/^x-anthropic-billing-header[^\n]*$/gim, '');
|
||||
combinedSystem = combinedSystem.replace(/\n{3,}/g, '\n\n').trim();
|
||||
}
|
||||
|
||||
// ★ Thinking 提示注入:当客户端请求 thinking 时,引导模型使用 <thinking> 标签
|
||||
if (req.thinking?.type === 'enabled') {
|
||||
const thinkingHint = '\n\nBefore responding, think through the problem step by step inside <thinking>...</thinking> tags. Your thinking will be extracted and returned separately. After thinking, provide your actual response outside the tags.';
|
||||
// ★ Thinking 提示注入:根据是否有工具选择不同的注入位置
|
||||
// 有工具时:放在工具指令末尾(不会被工具定义覆盖,模型更容易注意)
|
||||
// 无工具时:放在系统提示词末尾(原有行为,已验证有效)
|
||||
const thinkingEnabled = req.thinking?.type === 'enabled' || req.thinking?.type === 'adaptive';
|
||||
const thinkingHint = '\n\n**IMPORTANT**: Before your response, you MUST first think through the problem step by step inside <thinking>...</thinking> tags. Your thinking process will be extracted and shown separately. After the closing </thinking> tag, provide your actual response or actions.';
|
||||
if (thinkingEnabled && !hasTools) {
|
||||
combinedSystem = (combinedSystem || '') + thinkingHint;
|
||||
}
|
||||
|
||||
@@ -189,6 +191,11 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
|
||||
const hasCommunicationTool = tools.some(t => ['attempt_completion', 'ask_followup_question', 'AskFollowupQuestion'].includes(t.name));
|
||||
let toolInstructions = buildToolInstructions(tools, hasCommunicationTool, toolChoice);
|
||||
|
||||
// ★ 有工具时:thinking 提示放在工具指令末尾(模型注意力最强的位置之一)
|
||||
if (thinkingEnabled) {
|
||||
toolInstructions += thinkingHint;
|
||||
}
|
||||
|
||||
// 系统提示词与工具指令合并
|
||||
toolInstructions = combinedSystem + '\n\n---\n\n' + toolInstructions;
|
||||
|
||||
@@ -214,8 +221,14 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
|
||||
id: shortId(),
|
||||
role: 'user',
|
||||
});
|
||||
// ★ 当 thinking 启用时,few-shot 示例也包含 <thinking> 标签
|
||||
// few-shot 是让模型遵循输出格式最强力的手段
|
||||
const fewShotAction = `\`\`\`json action\n${JSON.stringify({ tool: fewShotTool.name, parameters: fewShotParams }, null, 2)}\n\`\`\``;
|
||||
const fewShotResponse = thinkingEnabled
|
||||
? `<thinking>\nThe user wants me to help with their project. I should start by examining the project structure to understand what we're working with.\n</thinking>\n\nLet me start by examining the project structure.\n\n${fewShotAction}`
|
||||
: `Understood. I'll use the structured format for actions. Here's how I'll respond:\n\n${fewShotAction}`;
|
||||
messages.push({
|
||||
parts: [{ type: 'text', text: `Understood. I'll use the structured format for actions. Here's how I'll respond:\n\n\`\`\`json action\n${JSON.stringify({ tool: fewShotTool.name, parameters: fewShotParams }, null, 2)}\n\`\`\`` }],
|
||||
parts: [{ type: 'text', text: fewShotResponse }],
|
||||
id: shortId(),
|
||||
role: 'assistant',
|
||||
});
|
||||
@@ -270,7 +283,12 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise<Cur
|
||||
|
||||
actualQuery = actualQuery.trim();
|
||||
|
||||
let wrapped = `${actualQuery}\n\nRespond with the appropriate action using the structured format.`;
|
||||
// ★ 判断是否是最后一条用户消息(模型即将回答的那条)
|
||||
const isLastUserMsg = !req.messages.slice(i + 1).some(m => m.role === 'user');
|
||||
const thinkingSuffix = (thinkingEnabled && isLastUserMsg)
|
||||
? '\n\nFirst, think step by step inside <thinking>...</thinking> tags. Then respond with the appropriate action using the structured format.'
|
||||
: '\n\nRespond with the appropriate action using the structured format.';
|
||||
let wrapped = `${actualQuery}${thinkingSuffix}`;
|
||||
|
||||
if (tagsPrefix) {
|
||||
text = `${tagsPrefix}\n${wrapped}`;
|
||||
|
||||
@@ -444,15 +444,23 @@ export async function handleMessages(req: Request, res: Response): Promise<void>
|
||||
// 转换为 Cursor 请求
|
||||
log.startPhase('convert', '格式转换');
|
||||
log.info('Handler', 'convert', '开始转换为 Cursor 请求格式');
|
||||
// ★ 区分客户端 thinking 模式:
|
||||
// - enabled: GUI 插件,支持渲染 thinking content block
|
||||
// - adaptive: Claude Code,需要密码学 signature 验证,无法伪造 → 保留标签在正文中
|
||||
const clientRequestedThinking = body.thinking?.type === 'enabled';
|
||||
// ★ Thinking 默认启用:Claude Code 等客户端可能不传 thinking 参数,proxy 层自动补上
|
||||
if (!body.thinking) {
|
||||
body.thinking = { type: 'enabled' };
|
||||
}
|
||||
const cursorReq = await convertToCursorRequest(body);
|
||||
log.endPhase();
|
||||
log.recordCursorRequest(cursorReq);
|
||||
log.debug('Handler', 'convert', `转换完成: ${cursorReq.messages.length} messages, model=${cursorReq.model}`);
|
||||
log.debug('Handler', 'convert', `转换完成: ${cursorReq.messages.length} messages, model=${cursorReq.model}, clientThinking=${clientRequestedThinking}, thinkingType=${body.thinking?.type}`);
|
||||
|
||||
if (body.stream) {
|
||||
await handleStream(res, cursorReq, body, log);
|
||||
await handleStream(res, cursorReq, body, log, clientRequestedThinking);
|
||||
} else {
|
||||
await handleNonStream(res, cursorReq, body, log);
|
||||
await handleNonStream(res, cursorReq, body, log, clientRequestedThinking);
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
@@ -616,7 +624,7 @@ export function buildRetryRequest(body: AnthropicRequest, attempt: number): Anth
|
||||
|
||||
// ==================== 流式处理 ====================
|
||||
|
||||
async function handleStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger): Promise<void> {
|
||||
async function handleStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger, clientRequestedThinking: boolean = false): Promise<void> {
|
||||
// 设置 SSE headers
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
@@ -681,16 +689,21 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
|
||||
});
|
||||
|
||||
// ★ Thinking 提取(在拒绝检测之前,防止 thinking 内容触发 isRefusal 误判)
|
||||
const thinkingEnabled = body.thinking?.type === 'enabled';
|
||||
let thinkingContent = '';
|
||||
if (fullResponse.includes('<thinking>')) {
|
||||
const thinkingMatch = fullResponse.match(/<thinking>([\s\S]*?)<\/thinking>/g);
|
||||
if (thinkingMatch) {
|
||||
thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n');
|
||||
fullResponse = fullResponse.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
|
||||
log.info('Handler', 'thinking', `剥离 thinking: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`);
|
||||
log.recordThinking(thinkingContent);
|
||||
log.updateSummary({ thinkingChars: thinkingContent.length });
|
||||
if (clientRequestedThinking) {
|
||||
// 客户端原生请求 thinking → 剥离标签,稍后发送 thinking content block
|
||||
fullResponse = fullResponse.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
|
||||
log.info('Handler', 'thinking', `剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`);
|
||||
} else {
|
||||
// proxy 注入的 thinking → 保留标签在正文中,Claude Code 可直接显示
|
||||
log.info('Handler', 'thinking', `保留 thinking 在正文中 (非客户端请求): ${thinkingContent.length} chars`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -729,10 +742,11 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
|
||||
}
|
||||
}
|
||||
|
||||
// 极短响应重试(可能是连接中断)
|
||||
if (hasTools && fullResponse.trim().length < 10 && retryCount < MAX_REFUSAL_RETRIES) {
|
||||
// 极短响应重试(仅在响应几乎为空时触发,避免误判正常短回答如 "2" 或 "25岁")
|
||||
const trimmed = fullResponse.trim();
|
||||
if (hasTools && trimmed.length < 3 && !trimmed.match(/\d/) && retryCount < MAX_REFUSAL_RETRIES) {
|
||||
retryCount++;
|
||||
log.warn('Handler', 'retry', `响应过短 (${fullResponse.length} chars),重试第${retryCount}次`);
|
||||
log.warn('Handler', 'retry', `响应过短 (${fullResponse.length} chars: "${trimmed}"),重试第${retryCount}次`);
|
||||
activeCursorReq = await convertToCursorRequest(body);
|
||||
await executeStream();
|
||||
log.info('Handler', 'retry', `重试响应: ${fullResponse.length} chars`, { preview: fullResponse.substring(0, 200) });
|
||||
@@ -835,9 +849,10 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
|
||||
log.warn('Handler', 'truncation', `${MAX_AUTO_CONTINUE}次续写后仍截断 (${fullResponse.length} chars) → stop_reason=max_tokens`);
|
||||
}
|
||||
|
||||
// ★ Thinking 块发送:在实际内容之前发送 thinking content block
|
||||
// ★ Thinking 块发送:仅 GUI 插件(enabled)才发 thinking content block
|
||||
// Claude Code(adaptive)需要密码学 signature 验证,无法伪造,所以保留标签在正文中
|
||||
log.startPhase('stream', 'SSE 输出');
|
||||
if (thinkingEnabled && thinkingContent) {
|
||||
if (clientRequestedThinking && thinkingContent) {
|
||||
writeSSE(res, 'content_block_start', {
|
||||
type: 'content_block_start', index: blockIndex,
|
||||
content_block: { type: 'thinking', thinking: '' },
|
||||
@@ -1038,7 +1053,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
|
||||
|
||||
// ==================== 非流式处理 ====================
|
||||
|
||||
async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger): Promise<void> {
|
||||
async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body: AnthropicRequest, log: RequestLogger, clientRequestedThinking: boolean = false): Promise<void> {
|
||||
// ★ 非流式保活:手动设置 chunked 响应,在缓冲期间每 15s 发送空白字符保活
|
||||
// JSON.parse 会忽略前导空白,所以客户端解析不受影响
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
@@ -1068,14 +1083,17 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
|
||||
});
|
||||
|
||||
// ★ Thinking 提取(在拒绝检测之前)
|
||||
const thinkingEnabled = body.thinking?.type === 'enabled';
|
||||
let thinkingContent = '';
|
||||
if (fullText.includes('<thinking>')) {
|
||||
const thinkingMatch = fullText.match(/<thinking>([\s\S]*?)<\/thinking>/g);
|
||||
if (thinkingMatch) {
|
||||
thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n');
|
||||
fullText = fullText.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
|
||||
log.info('Handler', 'thinking', `非流式剥离 thinking: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`);
|
||||
if (clientRequestedThinking) {
|
||||
fullText = fullText.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, '').trim();
|
||||
log.info('Handler', 'thinking', `非流式剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`);
|
||||
} else {
|
||||
log.info('Handler', 'thinking', `非流式保留 thinking 在正文中: ${thinkingContent.length} chars`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1198,8 +1216,8 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
|
||||
|
||||
const contentBlocks: AnthropicContentBlock[] = [];
|
||||
|
||||
// ★ Thinking 内容作为第一个 content block
|
||||
if (thinkingEnabled && thinkingContent) {
|
||||
// ★ Thinking 内容作为第一个 content block(仅客户端原生请求时)
|
||||
if (clientRequestedThinking && thinkingContent) {
|
||||
contentBlocks.push({ type: 'thinking' as any, thinking: thinkingContent } as any);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ export interface AnthropicRequest {
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
stop_sequences?: string[];
|
||||
thinking?: { type: 'enabled' | 'disabled'; budget_tokens?: number };
|
||||
thinking?: { type: 'enabled' | 'disabled' | 'adaptive'; budget_tokens?: number };
|
||||
}
|
||||
|
||||
/** tool_choice 控制模型是否必须调用工具
|
||||
|
||||
Reference in New Issue
Block a user