diff --git a/src/handler.ts b/src/handler.ts index a0a8c4e..5abc184 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -140,6 +140,38 @@ export function isRefusal(text: string): boolean { return REFUSAL_PATTERNS.some(p => p.test(text)); } +// ==================== Thinking 提取 ==================== + +const THINKING_OPEN = ''; +const THINKING_CLOSE = ''; + +/** + * 安全提取 thinking 内容并返回剥离后的正文。 + * + * ★ 使用 indexOf + lastIndexOf 而非非贪婪正则 [\s\S]*? + * 防止 thinking 内容本身包含 字面量时提前截断, + * 导致 thinking 后半段 + 闭合标签泄漏到正文。 + */ +export function extractThinking(text: string): { thinkingContent: string; strippedText: string } { + const startIdx = text.indexOf(THINKING_OPEN); + if (startIdx === -1) return { thinkingContent: '', strippedText: text }; + + const contentStart = startIdx + THINKING_OPEN.length; + const endIdx = text.lastIndexOf(THINKING_CLOSE); + + if (endIdx > startIdx) { + return { + thinkingContent: text.slice(contentStart, endIdx).trim(), + strippedText: (text.slice(0, startIdx) + text.slice(endIdx + THINKING_CLOSE.length)).trim(), + }; + } + // 未闭合(流式截断)→ thinking 取到末尾,正文为开头部分 + return { + thinkingContent: text.slice(contentStart).trim(), + strippedText: text.slice(0, startIdx).trim(), + }; +} + // ==================== 模型列表 ==================== export function listModels(_req: Request, res: Response): void { @@ -686,6 +718,16 @@ async function handleDirectTextStream( clientRequestedThinking: boolean, streamState: { blockIndex: number; textBlockStarted: boolean; thinkingEmitted: boolean }, ): Promise { + // ★ 流式保活:增量流式路径也需要 keepalive,防止 thinking 缓冲期间网关 504 + const keepaliveInterval = setInterval(() => { + try { + res.write(': keepalive\n\n'); + // @ts-expect-error flush exists on ServerResponse when compression is used + if (typeof res.flush === 'function') res.flush(); + } catch { /* connection already closed, ignore */ } + }, 15000); + + try { let activeCursorReq = cursorReq; let retryCount = 0; let finalRawResponse = ''; @@ -816,9 +858,9 @@ async function handleDirectTextStream( }); if (!finalThinkingContent && finalRawResponse.includes('')) { - const thinkingMatch = finalRawResponse.match(/([\s\S]*?)<\/thinking>/g); - if (thinkingMatch) { - finalThinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n'); + const { thinkingContent: extracted } = extractThinking(finalRawResponse); + if (extracted) { + finalThinkingContent = extracted; } } @@ -875,6 +917,9 @@ async function handleDirectTextStream( log.complete(finalRecordedResponse.length, 'end_turn'); res.end(); + } finally { + clearInterval(keepaliveInterval); + } } // ==================== 流式处理 ==================== @@ -902,14 +947,9 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A }, }); - // ★ 流式保活:在缓冲/续写期间每 15s 发送 SSE 注释,防止网关判定连接空闲超时 (504) - const keepaliveInterval = setInterval(() => { - try { - res.write(': keepalive\n\n'); - // @ts-expect-error flush exists on ServerResponse when compression is used - if (typeof res.flush === 'function') res.flush(); - } catch { /* connection already closed, ignore */ } - }, 15000); + // ★ 流式保活 — 注意:无工具的增量流式路径(handleDirectTextStream)有自己的 keepalive + // 这里的 keepalive 仅用于工具模式下的缓冲/续写期间 + let keepaliveInterval: ReturnType | undefined; let fullResponse = ''; let sentText = ''; @@ -945,6 +985,15 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A return; } + // 工具模式:创建 keepalive(无工具路径已在 handleDirectTextStream 内部处理) + keepaliveInterval = setInterval(() => { + try { + res.write(': keepalive\n\n'); + // @ts-expect-error flush exists on ServerResponse when compression is used + if (typeof res.flush === 'function') res.flush(); + } catch { /* connection already closed, ignore */ } + }, 15000); + await executeStream(); log.recordRawResponse(fullResponse); @@ -956,14 +1005,14 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A // ★ Thinking 提取(在拒绝检测之前,防止 thinking 内容触发 isRefusal 误判) let thinkingContent = ''; if (fullResponse.includes('')) { - const thinkingMatch = fullResponse.match(/([\s\S]*?)<\/thinking>/g); - if (thinkingMatch) { - thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n'); + const { thinkingContent: extracted, strippedText } = extractThinking(fullResponse); + if (extracted) { + thinkingContent = extracted; log.recordThinking(thinkingContent); log.updateSummary({ thinkingChars: thinkingContent.length }); if (clientRequestedThinking) { // 客户端原生请求 thinking → 剥离标签,稍后发送 thinking content block - fullResponse = fullResponse.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); + fullResponse = strippedText; log.info('Handler', 'thinking', `剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullResponse.length} chars`); } else { // proxy 注入的 thinking → 保留标签在正文中,Claude Code 可直接显示 @@ -977,7 +1026,7 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A // 否则 thinking 中的反思性语言(如 "haven't given a specific task")会触发误判 const getTextForRefusalCheck = () => { if (fullResponse.includes('')) { - return fullResponse.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); + return extractThinking(fullResponse).strippedText; } return fullResponse; }; @@ -1360,11 +1409,11 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body // ★ Thinking 提取(在拒绝检测之前) let thinkingContent = ''; if (fullText.includes('')) { - const thinkingMatch = fullText.match(/([\s\S]*?)<\/thinking>/g); - if (thinkingMatch) { - thinkingContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n'); + const { thinkingContent: extracted, strippedText } = extractThinking(fullText); + if (extracted) { + thinkingContent = extracted; if (clientRequestedThinking) { - fullText = fullText.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); + fullText = strippedText; log.info('Handler', 'thinking', `非流式剥离 thinking → content block: ${thinkingContent.length} chars, 剩余 ${fullText.length} chars`); } else { log.info('Handler', 'thinking', `非流式保留 thinking 在正文中: ${thinkingContent.length} chars`); @@ -1376,7 +1425,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body // ★ 关键:拒绝检测必须在 thinking-stripped 文本上进行 const getTextForRefusalCheck = () => { if (fullText.includes('')) { - return fullText.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); + return extractThinking(fullText).strippedText; } return fullText; }; diff --git a/src/openai-handler.ts b/src/openai-handler.ts index efcd4e0..6bc65e2 100644 --- a/src/openai-handler.ts +++ b/src/openai-handler.ts @@ -35,6 +35,7 @@ import { isIdentityProbe, isToolCapabilityQuestion, buildRetryRequest, + extractThinking, CLAUDE_IDENTITY_RESPONSE, CLAUDE_TOOLS_RESPONSE, MAX_REFUSAL_RETRIES, @@ -615,12 +616,12 @@ async function handleOpenAIStream( const thinkingEnabled = anthropicReq.thinking?.type === 'enabled'; let reasoningContent: string | undefined; if (fullResponse.includes('')) { - const thinkingMatch = fullResponse.match(/([\s\S]*?)<\/thinking>/g); - if (thinkingMatch) { + const { thinkingContent: extracted, strippedText } = extractThinking(fullResponse); + if (extracted) { if (thinkingEnabled) { - reasoningContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n'); + reasoningContent = extracted; } - fullResponse = stripThinkingTags(fullResponse); + fullResponse = strippedText; // thinking 剥离记录在详细日志中 } } @@ -819,14 +820,13 @@ async function handleOpenAINonStream( const thinkingEnabled = anthropicReq.thinking?.type === 'enabled'; let reasoningContent: string | undefined; if (fullText.includes('')) { - const thinkingMatch = fullText.match(/([\s\S]*?)<\/thinking>/g); - if (thinkingMatch) { + const { thinkingContent: extracted, strippedText } = extractThinking(fullText); + if (extracted) { if (thinkingEnabled) { - reasoningContent = thinkingMatch.map(m => m.replace(/<\/?thinking>/g, '').trim()).join('\n\n'); + reasoningContent = extracted; } - const stripped = fullText.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); // thinking 剥离记录 - fullText = stripped; + fullText = strippedText; } } @@ -841,7 +841,7 @@ async function handleOpenAINonStream( fullText = await sendCursorRequestFull(retryCursorReq); // 重试响应也需要先剥离 thinking if (fullText.includes('')) { - fullText = fullText.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); + fullText = extractThinking(fullText).strippedText; } if (!shouldRetry()) break; } diff --git a/src/streaming-text.ts b/src/streaming-text.ts index 683adc2..b9da55b 100644 --- a/src/streaming-text.ts +++ b/src/streaming-text.ts @@ -37,10 +37,19 @@ const STREAM_START_BOUNDARY_RE = /[\n。!?.!?]/; /** * 剥离完整的 thinking 标签,返回可用于拒绝检测或最终文本处理的正文。 + * + * ★ 使用 indexOf + lastIndexOf 而非非贪婪正则,防止 thinking 内容本身 + * 包含 字面量时提前截断导致标签泄漏到正文。 */ export function stripThinkingTags(text: string): string { - if (!text) return text; - return text.replace(/[\s\S]*?<\/thinking>\s*/g, '').trim(); + if (!text || !text.includes(THINKING_OPEN)) return text; + const startIdx = text.indexOf(THINKING_OPEN); + const endIdx = text.lastIndexOf(THINKING_CLOSE); + if (endIdx > startIdx) { + return (text.slice(0, startIdx) + text.slice(endIdx + THINKING_CLOSE.length)).trim(); + } + // 未闭合(流式截断)→ 剥离从 开始的全部内容 + return text.slice(0, startIdx).trim(); } /**