diff --git a/Dockerfile b/Dockerfile index dfddf1a..add933d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,9 @@ WORKDIR /app # 设置为生产环境 ENV NODE_ENV=production +# 增大 Node.js 堆内存上限,防止日志文件过大时加载 OOM(tesseract.js / js-tiktoken 初始化也有一定内存需求) +ENV NODE_OPTIONS="--max-old-space-size=4096" + # 出于安全考虑,避免使用 root 用户运行服务 RUN addgroup --system --gid 1001 nodejs && \ adduser --system --uid 1001 cursor diff --git a/README.md b/README.md index c88fb25..a125b20 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ cp config.yaml.example config.yaml | `logging.persist_mode` | 日志落盘模式:`summary` 问答摘要 / `compact` 精简 / `full` 完整 | `summary` | | `max_auto_continue` | 截断自动续写次数 (`0`=禁用,交由客户端续写) | `0` | | `max_history_messages` | 历史消息条数上限,超出时删除最早消息(建议改用 `max_history_tokens`) | `-1`(不限制) | -| `max_history_tokens` | 历史消息 token 数上限(推荐),有助于减少超出 Cursor 上下文的概率;注意 tiktoken 低估约 10~20%,建议参考实际 UI 日志调整,参考值 `120000~140000` | `130000` | +| `max_history_tokens` | 历史消息 token 数上限(推荐),代码自动补偿 Cursor 后端开销(1,300 基础 + 工具 tokenizer 差异),参考值 `130000~170000` | `150000` | | `sanitize_response` | 响应内容清洗开关(替换 Cursor 身份引用为 Claude) | `false` | | `refusal_patterns` | 自定义拒绝检测规则列表(追加到内置规则) | 不配置 | | `tools.passthrough` | 🆕 透传模式:跳过 few-shot 注入,原始 JSON 嵌入(Roo Code/Cline 推荐) | `false` | @@ -243,6 +243,8 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use` | 环境变量 | 说明 | |----------|------| +> ⚠️ **环境变量优先级高于 `config.yaml`**:若在 docker-compose 等环境中设置了环境变量,该参数的 `config.yaml` 配置会被覆盖,热重载对其**无效**。需要通过 `config.yaml` 动态调整的参数,请勿同时在环境变量中设置。 + | `PORT` | 服务端口 | | `AUTH_TOKEN` | API 鉴权 token(逗号分隔多个) | | `PROXY` | 全局代理地址 | @@ -254,7 +256,7 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use` | `LOG_DIR` | 日志文件目录 | | `MAX_AUTO_CONTINUE` | 截断自动续写次数 (`0`=禁用) | | `MAX_HISTORY_MESSAGES` | 历史消息条数上限(`-1`=不限制) | -| `MAX_HISTORY_TOKENS` | 历史消息 token 数上限(默认 `130000`,`-1`=不限制,参考值 `120000~140000`,tiktoken 低估约 10~20%) | +| `MAX_HISTORY_TOKENS` | 历史消息 token 数上限(默认 `150000`,`-1`=不限制,参考值 `130000~170000`,代码自动补偿 Cursor 后端开销) | | `SANITIZE_RESPONSE` | 响应内容清洗开关 (`true`/`false`,默认 `false`) | | `TOOLS_PASSTHROUGH` | 🆕 工具透传模式 (`true`/`false`,默认 `false`) | | `TOOLS_DISABLED` | 🆕 工具禁用模式 (`true`/`false`,默认 `false`) | diff --git a/config.yaml.example b/config.yaml.example index 6328c4f..a5fb251 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -1,5 +1,10 @@ # Cursor2API v2 配置文件 # 复制此文件为 config.yaml 并根据需要修改 +# +# ⚠️ 环境变量优先级高于此文件: +# 若通过环境变量(如 docker-compose 的 environment 块)设置了某个参数, +# 则修改此文件对该参数无效,热重载也不会生效。 +# 需要在 config.yaml 中管理的参数,请勿同时在环境变量中设置。 # 服务端口 port: 3010 @@ -46,25 +51,27 @@ max_history_messages: -1 # 按 js-tiktoken (cl100k_base) 估算 token 数裁剪历史,比按条数更精准 # 能有效防止超出 Cursor API 200k 上下文上限,保障模型输出稳定 # -# ⚠️ 注意:js-tiktoken 使用 OpenAI cl100k_base 词表估算,与 Claude 实际 tokenizer 有差异 -# 实测低估约 10%~20%,中英混合/工具调用场景差异更大 -# 建议开启后观察 UI 日志中的「↑ Cursor 输入 tokens」真实值,再据此调整 +# 说明:此值仅计算我们发送的消息内容 token +# 代码会自动额外补偿 Cursor 后端开销(动态计算): +# - 基础隐藏系统提示:约 1,300 tokens(固定) +# - 工具 tokenizer 差异:compact ~20/工具,full ~240/工具,names_only ~5/工具 +# 输出空间不在此预留,由用户自行通过此值控制(建议留 16,000~32,000 余量) # # 裁剪规则: -# - 系统提示 + 工具定义的 token 优先扣除 +# - 系统提示 + 工具定义的 token 优先扣除(含上述固定开销) # - 剩余额度从最新消息往前累加,超出预算的最早消息整条删除 # - 工具模式的 few-shot 示例(前 2 条)始终保留 # -# 参考值:120000~140000(考虑到估算误差,需预留足够安全余量) -# Cursor API 上下文上限约 200k tokens,实际可用历史额度受系统提示和工具定义影响 +# 参考值:130000~170000,默认 150000 +# Cursor API 上下文上限约 200k tokens,建议 max_history_tokens + 开销 + 预留输出 ≤ 200000 # # 与 max_history_messages 的关系: # 两者独立生效,若同时设置则取更严格的结果 # 推荐:只设置 max_history_tokens,不设置 max_history_messages # # 设为 -1 不限制 -# 环境变量: MAX_HISTORY_TOKENS=130000 -max_history_tokens: 130000 +# 环境变量: MAX_HISTORY_TOKENS=150000 +max_history_tokens: 150000 # ==================== Thinking 开关(最高优先级) ==================== # 控制是否向 Cursor 发送 thinking 请求,优先级高于客户端传入的 thinking 参数 diff --git a/docker-compose.yml b/docker-compose.yml index 4792af2..35ffeb7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -39,7 +39,7 @@ services: # ── 自动续写 & 历史消息限制 ── # - MAX_AUTO_CONTINUE=0 # 截断后自动续写次数,0=禁用(默认) # - MAX_HISTORY_MESSAGES=-1 # 历史消息条数上限,-1=不限制(建议改用 MAX_HISTORY_TOKENS) - # - MAX_HISTORY_TOKENS=130000 # 历史消息 token 数上限(推荐),默认 130000,参考值 120000~140000(tiktoken 低估约 10~20%,建议观察 UI 日志实际值后调整) + # - MAX_HISTORY_TOKENS=150000 # 历史消息 token 数上限(推荐),默认 150000,参考值 130000~170000(代码自动补偿 Cursor 后端开销) # ── 日志持久化 ── # - LOG_FILE_ENABLED=true diff --git a/src/config.ts b/src/config.ts index 1cffc10..7b34a54 100644 --- a/src/config.ts +++ b/src/config.ts @@ -195,7 +195,7 @@ function defaultConfig(): AppConfig { cursorModel: 'anthropic/claude-sonnet-4.6', maxAutoContinue: 0, maxHistoryMessages: -1, - maxHistoryTokens: 130000, + maxHistoryTokens: 150000, sanitizeEnabled: false, // 默认关闭响应内容清洗 fingerprint: { userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36', diff --git a/src/converter.ts b/src/converter.ts index c18196f..77c4669 100644 --- a/src/converter.ts +++ b/src/converter.ts @@ -682,17 +682,21 @@ I will ALWAYS use this exact \`\`\`json action\`\`\` block format for tool calls if (maxHistoryTokens >= 0) { const fewShotOffset2 = hasTools ? 2 : 0; - // 估算系统提示 token 数 + // 直接对已构建的 few-shot 消息(系统提示+工具定义+few-shot回复)调用 estimateTokens + // 比 tools.length*70+350 更准确,因为实际注入文字已经在 messages[0..fewShotOffset2-1] 中 let overhead = 0; - if (req.system) { - const sysStr = typeof req.system === 'string' ? req.system : JSON.stringify(req.system); - overhead += estimateTokens(sysStr); - } - // 估算工具定义 token 数(压缩后约 70 tokens/工具 + 350 固定开销) - if (req.tools && req.tools.length > 0) { - overhead += req.tools.length * 70; - overhead += 350; + for (let i = 0; i < fewShotOffset2; i++) { + overhead += estimateTokens(messages[i].parts.map(p => p.text ?? '').join('')); } + // Cursor 后端额外开销:基础隐藏系统提示(实测约 1300 tokens)+ 工具 tokenizer 差异 + // 注意:工具定义已通过 buildToolInstructions 转为文本注入 messages[0],并已在上方 estimateTokens 中计算 + // Cursor 后端对工具的额外 tokenizer 差异与 schema_mode 强相关: + // compact模式 ~20 tokens/工具,full模式 ~240 tokens/工具,names_only ~5 tokens/工具 + // 输出空间不在此预留,由用户通过 max_history_tokens 自行控制 + const toolCount = req.tools?.length ?? 0; + const schemaMode = getConfig().tools?.schemaMode ?? 'compact'; + const perToolOverhead = schemaMode === 'full' ? 240 : (schemaMode === 'names_only' ? 5 : 20); + overhead += 1300 + toolCount * perToolOverhead; const historyBudget = Math.max(0, maxHistoryTokens - overhead); diff --git a/src/cursor-client.ts b/src/cursor-client.ts index e98d866..affdf73 100644 --- a/src/cursor-client.ts +++ b/src/cursor-client.ts @@ -247,14 +247,18 @@ async function sendCursorRequestInner( } /** - * 发送非流式请求,收集完整响应 + * 发送非流式请求,收集完整响应及 usage 信息 */ -export async function sendCursorRequestFull(req: CursorChatRequest): Promise { +export async function sendCursorRequestFull(req: CursorChatRequest): Promise<{ text: string; usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number } }> { let fullText = ''; + let usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined; await sendCursorRequest(req, (event) => { if (event.type === 'text-delta' && event.delta) { fullText += event.delta; } + if (event.messageMetadata?.usage) { + usage = event.messageMetadata.usage; + } }); - return fullText; + return { text: fullText, usage }; } diff --git a/src/handler.ts b/src/handler.ts index da4562d..f3e4c21 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -97,6 +97,19 @@ export function listModels(_req: Request, res: Response): void { // ==================== Token 计数 ==================== +/** + * 对实际发往 Cursor 的完整消息内容做 token 估算(用于与 Cursor 返回值对比) + */ +export function estimateCursorReqTokens(cursorReq: CursorChatRequest): number { + let total = 0; + for (const msg of cursorReq.messages) { + for (const part of msg.parts) { + total += estimateTokens(part.text ?? ''); + } + } + return total; +} + export function estimateInputTokens(body: AnthropicRequest): number { let total = 0; @@ -479,6 +492,8 @@ function toolCallNeedsMoreContinuation(toolCall: ParsedToolCall): boolean { */ export function shouldAutoContinueTruncatedToolResponse(text: string, hasTools: boolean): boolean { if (!hasTools || !isTruncated(text)) return false; + // 响应过短(< 200 chars)时不触发续写:上下文不足会导致模型拒绝或错误续写 + if (text.trim().length < 200) return false; if (!hasToolCalls(text)) return true; const { toolCalls } = parseToolCalls(text); @@ -677,7 +692,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener ], }; - const continuationResponse = await sendCursorRequestFull(continuationReq); + const { text: continuationResponse } = await sendCursorRequestFull(continuationReq); if (continuationResponse.trim().length === 0) break; const deduped = deduplicateContinuation(fullText, continuationResponse); @@ -1005,9 +1020,12 @@ async function handleDirectTextStream( ? sanitizeResponse(finalVisibleText) : finalTextToSend; log.recordFinalResponse(finalRecordedResponse); + const estimatedInput1 = estimateCursorReqTokens(activeCursorReq); + const actualInput1 = cursorUsage?.inputTokens; + console.log(`[TokenDiff] 流式(无工具) 估算(我们发的)=${estimatedInput1} Cursor实际=${actualInput1 ?? 'N/A'} Cursor隐藏开销=${actualInput1 != null ? (actualInput1 - estimatedInput1) : 'N/A'}`); log.updateSummary({ - inputTokens: cursorUsage?.inputTokens ?? estimateInputTokens(body), - outputTokens: cursorUsage?.outputTokens ?? estimateTokens(finalRecordedResponse), + inputTokens: cursorUsage?.inputTokens, + outputTokens: cursorUsage?.outputTokens, }); log.complete(finalRecordedResponse.length, 'end_turn'); @@ -1658,9 +1676,12 @@ Please go ahead and pick the most appropriate tool for the current task and outp // ★ 记录完成 log.recordFinalResponse(fullResponse); + const estimatedInput2 = estimateCursorReqTokens(activeCursorReq); + const actualInput2 = cursorUsage?.inputTokens; + console.log(`[TokenDiff] 流式(有工具) 估算(我们发的)=${estimatedInput2} Cursor实际=${actualInput2 ?? 'N/A'} Cursor隐藏开销=${actualInput2 != null ? (actualInput2 - estimatedInput2) : 'N/A'}`); log.updateSummary({ - inputTokens: cursorUsage?.inputTokens ?? estimateInputTokens(body), - outputTokens: cursorUsage?.outputTokens ?? estimateTokens(fullResponse), + inputTokens: cursorUsage?.inputTokens, + outputTokens: cursorUsage?.outputTokens, }); log.complete(fullResponse.length, stopReason); @@ -1695,7 +1716,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body try { log.startPhase('send', '发送到 Cursor (非流式)'); const apiStart = Date.now(); - let fullText = await sendCursorRequestFull(cursorReq); + let { text: fullText, usage: cursorUsage } = await sendCursorRequestFull(cursorReq); log.recordTTFT(); log.recordCursorApiTime(apiStart); log.recordRawResponse(fullText); @@ -1738,7 +1759,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body log.updateSummary({ retryCount }); const retryBody = buildRetryRequest(body, attempt); activeCursorReq = await convertToCursorRequest(retryBody); - fullText = await sendCursorRequestFull(activeCursorReq); + ({ text: fullText, usage: cursorUsage } = await sendCursorRequestFull(activeCursorReq)); // 重试后也需要剥离 thinking 标签 if (hasLeadingThinking(fullText)) { const { thinkingContent: retryThinking, strippedText: retryStripped } = extractThinking(fullText); @@ -1768,7 +1789,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body retryCount++; log.warn('Handler', 'retry', `非流式响应过短 (${fullText.length} chars),重试第${retryCount}次`); activeCursorReq = await convertToCursorRequest(body); - fullText = await sendCursorRequestFull(activeCursorReq); + ({ text: fullText, usage: cursorUsage } = await sendCursorRequestFull(activeCursorReq)); log.info('Handler', 'retry', `非流式重试响应: ${fullText.length} chars`, { preview: fullText.substring(0, 200) }); } @@ -1813,7 +1834,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener ], }; - const continuationResponse = await sendCursorRequestFull(continuationReq); + const { text: continuationResponse } = await sendCursorRequestFull(continuationReq); if (continuationResponse.trim().length === 0) { log.warn('Handler', 'continuation', '非流式续写返回空响应,停止续写'); @@ -1919,7 +1940,7 @@ Please go ahead and pick the most appropriate tool for the current task and outp }, ]; activeCursorReq = { ...activeCursorReq, messages: forceMessages }; - fullText = await sendCursorRequestFull(activeCursorReq); + ({ text: fullText } = await sendCursorRequestFull(activeCursorReq)); ({ toolCalls, cleanText } = parseToolCalls(fullText)); } if (toolChoice?.type === 'any' && toolCalls.length === 0) { @@ -1983,7 +2004,10 @@ Please go ahead and pick the most appropriate tool for the current task and outp // ★ 记录完成 log.recordFinalResponse(fullText); - log.updateSummary({ inputTokens: estimateInputTokens(body), outputTokens: estimateTokens(fullText) }); + const estimatedInput = estimateCursorReqTokens(activeCursorReq); + const actualInput = cursorUsage?.inputTokens; + console.log(`[TokenDiff] 非流式 估算(我们发的)=${estimatedInput} Cursor实际=${actualInput ?? 'N/A'} Cursor隐藏开销=${actualInput != null ? (actualInput - estimatedInput) : 'N/A'}`); + log.updateSummary({ inputTokens: cursorUsage?.inputTokens, outputTokens: cursorUsage?.outputTokens }); log.complete(fullText.length, stopReason); } catch (err: unknown) { diff --git a/src/openai-handler.ts b/src/openai-handler.ts index a70ec51..5441d50 100644 --- a/src/openai-handler.ts +++ b/src/openai-handler.ts @@ -1134,7 +1134,7 @@ async function handleOpenAINonStream( log: RequestLogger, ): Promise { let activeCursorReq = cursorReq; - let fullText = await sendCursorRequestFull(activeCursorReq); + let fullText = (await sendCursorRequestFull(activeCursorReq)).text; const hasTools = (body.tools?.length ?? 0) > 0; // 日志记录在详细日志中 @@ -1162,7 +1162,7 @@ async function handleOpenAINonStream( const retryBody = buildRetryRequest(anthropicReq, attempt); const retryCursorReq = await convertToCursorRequest(retryBody); activeCursorReq = retryCursorReq; - fullText = await sendCursorRequestFull(activeCursorReq); + fullText = (await sendCursorRequestFull(activeCursorReq)).text; // 重试响应也需要先剥离 thinking if (hasLeadingThinking(fullText)) { fullText = extractThinking(fullText).strippedText; @@ -1775,7 +1775,7 @@ async function handleResponsesNonStream( log: RequestLogger, ): Promise { let activeCursorReq = cursorReq; - let fullText = await sendCursorRequestFull(activeCursorReq); + let fullText = (await sendCursorRequestFull(activeCursorReq)).text; const hasTools = (anthropicReq.tools?.length ?? 0) > 0; // Thinking 提取 @@ -1790,7 +1790,7 @@ async function handleResponsesNonStream( const retryBody = buildRetryRequest(anthropicReq, attempt); const retryCursorReq = await convertToCursorRequest(retryBody); activeCursorReq = retryCursorReq; - fullText = await sendCursorRequestFull(activeCursorReq); + fullText = (await sendCursorRequestFull(activeCursorReq)).text; if (hasLeadingThinking(fullText)) { fullText = extractThinking(fullText).strippedText; } diff --git a/src/types.ts b/src/types.ts index 39f7030..77610e0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -116,7 +116,7 @@ export interface AppConfig { authTokens?: string[]; // API 鉴权 token 列表,为空则不鉴权 maxAutoContinue: number; // 自动续写最大次数,默认 3,设 0 禁用 maxHistoryMessages: number; // 历史消息条数硬限制,默认 -1(不限制) - maxHistoryTokens: number; // 历史消息 token 数上限(js-tiktoken 估算),默认 130000,-1 不限制 + maxHistoryTokens: number; // 历史消息 token 数上限(tiktoken 估算我们发出的内容,代码自动加 Cursor 后端开销:1300 基础 + perTool*工具数),默认 150000,-1 不限制 vision?: { enabled: boolean; mode: 'ocr' | 'api'; diff --git a/vue-ui/README.md b/vue-ui/README.md index 9a4c6cc..3e01d96 100644 --- a/vue-ui/README.md +++ b/vue-ui/README.md @@ -136,7 +136,7 @@ open http://localhost:3010/vuelogs | 基础 | `timeout` | 请求超时(秒) | | 基础 | `max_auto_continue` | 自动续写次数 | | 基础 | `max_history_messages` | 历史消息条数上限(建议改用 max_history_tokens) | -| 基础 | `max_history_tokens` | 历史消息 token 数上限(推荐),参考值 120000~140000(tiktoken 与 Claude 实际 tokenizer 有差异,建议观察 UI 日志实际值后调整) | +| 基础 | `max_history_tokens` | 历史消息 token 数上限(推荐),代码自动补偿 Cursor 后端开销(1,300 基础 + 工具 tokenizer 差异,动态计算),参考值 130000~170000,默认 150000 | | 功能 | `thinking.enabled` | Thinking 模式(跟随客户端/强制关闭/强制开启) | | 功能 | `sanitize_response` | 响应内容清洗 | | 历史压缩 | `compression.*` | 压缩开关、级别、保留条数等 | diff --git a/vue-ui/src/components/ConfigDrawer.vue b/vue-ui/src/components/ConfigDrawer.vue index b72d5d7..d8002c5 100644 --- a/vue-ui/src/components/ConfigDrawer.vue +++ b/vue-ui/src/components/ConfigDrawer.vue @@ -28,7 +28,7 @@ - +