From 18c90e072051c3afab5b137ac7af56d3fd52ea5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=B5=B7?= <7836246@qq.com> Date: Wed, 18 Mar 2026 09:10:08 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=BB=AD=E5=86=99=E6=AC=A1=E6=95=B0?= =?UTF-8?q?=E5=8F=AF=E9=85=8D=E7=BD=AE=20+=20=E5=8E=86=E5=8F=B2=E6=B6=88?= =?UTF-8?q?=E6=81=AF=E6=9D=A1=E6=95=B0=E7=A1=AC=E9=99=90=E5=88=B6=20(#61)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 采纳社区建议,新增两个配置项: 1. max_auto_continue(默认 3,设 0 禁用) - 替换了 handler.ts 中 4 处硬编码的 MAX_AUTO_CONTINUE = 3 - 设为 0 时 while 循环直接短路,完全禁用自动续写 - 用户可在对话中自行要求续写,往往更精准 2. max_history_messages(默认 100,-1 不限制) - 在 converter.ts 消息转换后、压缩步骤前执行裁剪 - 保留 few-shot 示例(工具模式前 2 条),删除最早的非 few-shot 消息 - 防止 800+ 条消息导致请求体积过大、响应变慢 两项配置均支持 config.yaml 和环境变量(MAX_AUTO_CONTINUE / MAX_HISTORY_MESSAGES) --- config.yaml.example | 13 +++++++++++++ docker-compose.yml | 4 ++++ src/config.ts | 6 ++++++ src/converter.ts | 13 +++++++++++++ src/handler.ts | 16 ++++++++-------- src/types.ts | 2 ++ 6 files changed, 46 insertions(+), 8 deletions(-) diff --git a/config.yaml.example b/config.yaml.example index 3af1e20..55eb7d0 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -27,6 +27,19 @@ timeout: 120 # Cursor 使用的模型 cursor_model: "anthropic/claude-sonnet-4.6" +# ==================== 自动续写配置 ==================== +# 当模型输出被截断时,自动发起续写请求的最大次数 +# 设为 0 可完全禁用自动续写(由用户在对话中手动续写) +# 环境变量: MAX_AUTO_CONTINUE=3 +max_auto_continue: 3 + +# ==================== 历史消息条数硬限制 ==================== +# 输入消息条数上限,超出时删除最早的消息(保留工具 few-shot 示例) +# 防止超长对话(800+ 条)导致请求体积过大、响应变慢 +# 设为 -1 不限制消息条数 +# 环境变量: MAX_HISTORY_MESSAGES=100 +max_history_messages: 100 + # ==================== Thinking 开关(最高优先级) ==================== # 控制是否向 Cursor 发送 thinking 请求,优先级高于客户端传入的 thinking 参数 # 设为 true: 强制启用 thinking(即使客户端没请求也注入) diff --git a/docker-compose.yml b/docker-compose.yml index c4c53f1..ccedc01 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,6 +36,10 @@ services: # - COMPRESSION_ENABLED=true # - COMPRESSION_LEVEL=2 + # ── 自动续写 & 历史消息限制 ── + # - MAX_AUTO_CONTINUE=3 # 截断后自动续写次数,0=禁用 + # - MAX_HISTORY_MESSAGES=100 # 历史消息条数上限,-1=不限制 + # ── 日志持久化 ── # - LOG_FILE_ENABLED=true # - LOG_DIR=./logs diff --git a/src/config.ts b/src/config.ts index d5cd388..56be4ed 100644 --- a/src/config.ts +++ b/src/config.ts @@ -12,6 +12,8 @@ export function getConfig(): AppConfig { port: 3010, timeout: 120, cursorModel: 'anthropic/claude-sonnet-4.6', + maxAutoContinue: 3, + maxHistoryMessages: 100, fingerprint: { userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36', }, @@ -26,6 +28,8 @@ export function getConfig(): AppConfig { if (yaml.timeout) config.timeout = yaml.timeout; if (yaml.proxy) config.proxy = yaml.proxy; if (yaml.cursor_model) config.cursorModel = yaml.cursor_model; + if (typeof yaml.max_auto_continue === 'number') config.maxAutoContinue = yaml.max_auto_continue; + if (typeof yaml.max_history_messages === 'number') config.maxHistoryMessages = yaml.max_history_messages; if (yaml.fingerprint) { if (yaml.fingerprint.user_agent) config.fingerprint.userAgent = yaml.fingerprint.user_agent; } @@ -90,6 +94,8 @@ export function getConfig(): AppConfig { if (process.env.TIMEOUT) config.timeout = parseInt(process.env.TIMEOUT); if (process.env.PROXY) config.proxy = process.env.PROXY; if (process.env.CURSOR_MODEL) config.cursorModel = process.env.CURSOR_MODEL; + if (process.env.MAX_AUTO_CONTINUE !== undefined) config.maxAutoContinue = parseInt(process.env.MAX_AUTO_CONTINUE); + if (process.env.MAX_HISTORY_MESSAGES !== undefined) config.maxHistoryMessages = parseInt(process.env.MAX_HISTORY_MESSAGES); if (process.env.AUTH_TOKEN) { config.authTokens = process.env.AUTH_TOKEN.split(',').map(s => s.trim()).filter(Boolean); } diff --git a/src/converter.ts b/src/converter.ts index a5bf047..5997839 100644 --- a/src/converter.ts +++ b/src/converter.ts @@ -399,6 +399,19 @@ export async function convertToCursorRequest(req: AnthropicRequest): Promise= 0) { + const fewShotOffset = hasTools ? 2 : 0; // 工具模式有2条 few-shot 消息需跳过 + const userMessages = messages.length - fewShotOffset; + if (userMessages > maxHistoryMessages) { + const toRemove = userMessages - maxHistoryMessages; + messages.splice(fewShotOffset, toRemove); + console.log(`[Converter] 历史消息裁剪: ${userMessages} → ${maxHistoryMessages} 条 (移除了最早的 ${toRemove} 条)`); + } + } + // ★ 渐进式历史压缩(智能压缩,不破坏结构) // 可通过 config.yaml 的 compression 配置控制开关和级别 // 策略:保留最近 KEEP_RECENT 条消息完整,对早期消息进行结构感知压缩 diff --git a/src/handler.ts b/src/handler.ts index 81025cd..ee071c3 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -693,12 +693,12 @@ export async function autoContinueCursorToolResponseStream( hasTools: boolean, ): Promise { let fullResponse = initialResponse; - const MAX_AUTO_CONTINUE = 3; + const MAX_AUTO_CONTINUE = getConfig().maxAutoContinue; let continueCount = 0; let consecutiveSmallAdds = 0; const originalMessages = [...cursorReq.messages]; - while (shouldAutoContinueTruncatedToolResponse(fullResponse, hasTools) && continueCount < MAX_AUTO_CONTINUE) { + while (MAX_AUTO_CONTINUE > 0 && shouldAutoContinueTruncatedToolResponse(fullResponse, hasTools) && continueCount < MAX_AUTO_CONTINUE) { continueCount++; const anchorLength = Math.min(300, fullResponse.length); @@ -760,12 +760,12 @@ export async function autoContinueCursorToolResponseFull( hasTools: boolean, ): Promise { let fullText = initialText; - const MAX_AUTO_CONTINUE = 3; + const MAX_AUTO_CONTINUE = getConfig().maxAutoContinue; let continueCount = 0; let consecutiveSmallAdds = 0; const originalMessages = [...cursorReq.messages]; - while (shouldAutoContinueTruncatedToolResponse(fullText, hasTools) && continueCount < MAX_AUTO_CONTINUE) { + while (MAX_AUTO_CONTINUE > 0 && shouldAutoContinueTruncatedToolResponse(fullText, hasTools) && continueCount < MAX_AUTO_CONTINUE) { continueCount++; const anchorLength = Math.min(300, fullText.length); @@ -1279,14 +1279,14 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A // 流完成后,处理完整响应 // ★ 内部截断续写:如果模型输出过长被截断(常见于写大文件),Proxy 内部分段续写,然后拼接成完整响应 // 这样可以确保工具调用(如 Write)不会横跨两次 API 响应而退化为纯文本 - const MAX_AUTO_CONTINUE = 3; + const MAX_AUTO_CONTINUE = getConfig().maxAutoContinue; let continueCount = 0; let consecutiveSmallAdds = 0; // 连续小增量计数 // 保存原始请求的消息快照(不含续写追加的消息) const originalMessages = [...activeCursorReq.messages]; - while (shouldAutoContinueTruncatedToolResponse(fullResponse, hasTools) && continueCount < MAX_AUTO_CONTINUE) { + while (MAX_AUTO_CONTINUE > 0 && shouldAutoContinueTruncatedToolResponse(fullResponse, hasTools) && continueCount < MAX_AUTO_CONTINUE) { continueCount++; const prevLength = fullResponse.length; log.warn('Handler', 'continuation', `内部检测到截断 (${fullResponse.length} chars),隐式续写 (第${continueCount}次)`); @@ -1671,12 +1671,12 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body // ★ 内部截断续写(与流式路径对齐) // Claude CLI 使用非流式模式时,写大文件最容易被截断 // 在 proxy 内部完成续写,确保工具调用参数完整 - const MAX_AUTO_CONTINUE = 3; + const MAX_AUTO_CONTINUE = getConfig().maxAutoContinue; let continueCount = 0; let consecutiveSmallAdds = 0; // 连续小增量计数 const originalMessages = [...activeCursorReq.messages]; - while (shouldAutoContinueTruncatedToolResponse(fullText, hasTools) && continueCount < MAX_AUTO_CONTINUE) { + while (MAX_AUTO_CONTINUE > 0 && shouldAutoContinueTruncatedToolResponse(fullText, hasTools) && continueCount < MAX_AUTO_CONTINUE) { continueCount++; const prevLength = fullText.length; log.warn('Handler', 'continuation', `非流式检测到截断 (${fullText.length} chars),隐式续写 (第${continueCount}次)`); diff --git a/src/types.ts b/src/types.ts index 018fa6f..758473d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -106,6 +106,8 @@ export interface AppConfig { proxy?: string; cursorModel: string; authTokens?: string[]; // API 鉴权 token 列表,为空则不鉴权 + maxAutoContinue: number; // 自动续写最大次数,默认 3,设 0 禁用 + maxHistoryMessages: number; // 历史消息条数硬限制,默认 100,-1 不限制 vision?: { enabled: boolean; mode: 'ocr' | 'api';