From b542d554c66c3b2bf2dbafdf74dbede59470e53d Mon Sep 17 00:00:00 2001 From: huangzhenting Date: Sat, 21 Mar 2026 18:41:45 +0800 Subject: [PATCH] feat: add max_history_tokens to trim context by token budget Introduce js-tiktoken (cl100k_base) based token estimation to replace the naive chars/3 approach. Add max_history_tokens config option that trims oldest messages when the estimated token budget is exceeded. - src/tokenizer.ts: new module wrapping js-tiktoken getEncoding - src/config.ts/config-api.ts: YAML parse, env var, hot-reload, default 130000 - src/converter.ts: token budget trimming after max_history_messages pass - src/handler.ts: replace estimateInputTokens with tiktoken-based version - config.yaml.example/docker-compose.yml/README.md: docs and examples - vue-ui: ConfigDrawer field, HotConfig type, README table row --- README.md | 4 +++ config.yaml.example | 27 ++++++++++++++++- docker-compose.yml | 3 +- package-lock.json | 34 +++++++++++++++++++-- package.json | 1 + src/config-api.ts | 8 +++++ src/config.ts | 4 +++ src/converter.ts | 42 ++++++++++++++++++++++++++ src/tokenizer.ts | 19 ++++++++++++ src/types.ts | 11 ++++++- vue-ui/README.md | 3 +- vue-ui/src/components/ConfigDrawer.vue | 5 ++- vue-ui/src/types.ts | 3 ++ 13 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 src/tokenizer.ts diff --git a/README.md b/README.md index e3af166..c88fb25 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,8 @@ cp config.yaml.example config.yaml | `logging.max_days` | 日志保留天数 | `7` | | `logging.persist_mode` | 日志落盘模式:`summary` 问答摘要 / `compact` 精简 / `full` 完整 | `summary` | | `max_auto_continue` | 截断自动续写次数 (`0`=禁用,交由客户端续写) | `0` | +| `max_history_messages` | 历史消息条数上限,超出时删除最早消息(建议改用 `max_history_tokens`) | `-1`(不限制) | +| `max_history_tokens` | 历史消息 token 数上限(推荐),有助于减少超出 Cursor 上下文的概率;注意 tiktoken 低估约 10~20%,建议参考实际 UI 日志调整,参考值 `120000~140000` | `130000` | | `sanitize_response` | 响应内容清洗开关(替换 Cursor 身份引用为 Claude) | `false` | | `refusal_patterns` | 自定义拒绝检测规则列表(追加到内置规则) | 不配置 | | `tools.passthrough` | 🆕 透传模式:跳过 few-shot 注入,原始 JSON 嵌入(Roo Code/Cline 推荐) | `false` | @@ -251,6 +253,8 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use` | `LOG_FILE_ENABLED` | 日志文件持久化 (`true`/`false`) | | `LOG_DIR` | 日志文件目录 | | `MAX_AUTO_CONTINUE` | 截断自动续写次数 (`0`=禁用) | +| `MAX_HISTORY_MESSAGES` | 历史消息条数上限(`-1`=不限制) | +| `MAX_HISTORY_TOKENS` | 历史消息 token 数上限(默认 `130000`,`-1`=不限制,参考值 `120000~140000`,tiktoken 低估约 10~20%) | | `SANITIZE_RESPONSE` | 响应内容清洗开关 (`true`/`false`,默认 `false`) | | `TOOLS_PASSTHROUGH` | 🆕 工具透传模式 (`true`/`false`,默认 `false`) | | `TOOLS_DISABLED` | 🆕 工具禁用模式 (`true`/`false`,默认 `false`) | diff --git a/config.yaml.example b/config.yaml.example index f500f9c..6328c4f 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -36,11 +36,36 @@ max_auto_continue: 0 # ==================== 历史消息条数硬限制 ==================== # 输入消息条数上限,超出时删除最早的消息(保留工具 few-shot 示例) -# 防止超长对话(800+ 条)导致请求体积过大、响应变慢 +# 注意:按条数限制无法反映实际 token 体积,建议改用 max_history_tokens(更精准) +# 如需同时设置,两者独立生效,取更严格的结果 # 设为 -1 不限制消息条数 # 环境变量: MAX_HISTORY_MESSAGES=100 max_history_messages: -1 +# ==================== 历史消息 Token 数硬限制(推荐) ==================== +# 按 js-tiktoken (cl100k_base) 估算 token 数裁剪历史,比按条数更精准 +# 能有效防止超出 Cursor API 200k 上下文上限,保障模型输出稳定 +# +# ⚠️ 注意:js-tiktoken 使用 OpenAI cl100k_base 词表估算,与 Claude 实际 tokenizer 有差异 +# 实测低估约 10%~20%,中英混合/工具调用场景差异更大 +# 建议开启后观察 UI 日志中的「↑ Cursor 输入 tokens」真实值,再据此调整 +# +# 裁剪规则: +# - 系统提示 + 工具定义的 token 优先扣除 +# - 剩余额度从最新消息往前累加,超出预算的最早消息整条删除 +# - 工具模式的 few-shot 示例(前 2 条)始终保留 +# +# 参考值:120000~140000(考虑到估算误差,需预留足够安全余量) +# Cursor API 上下文上限约 200k tokens,实际可用历史额度受系统提示和工具定义影响 +# +# 与 max_history_messages 的关系: +# 两者独立生效,若同时设置则取更严格的结果 +# 推荐:只设置 max_history_tokens,不设置 max_history_messages +# +# 设为 -1 不限制 +# 环境变量: MAX_HISTORY_TOKENS=130000 +max_history_tokens: 130000 + # ==================== Thinking 开关(最高优先级) ==================== # 控制是否向 Cursor 发送 thinking 请求,优先级高于客户端传入的 thinking 参数 # 设为 true: 强制启用 thinking(即使客户端没请求也注入) diff --git a/docker-compose.yml b/docker-compose.yml index 42dd1d9..4792af2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,7 +38,8 @@ services: # ── 自动续写 & 历史消息限制 ── # - MAX_AUTO_CONTINUE=0 # 截断后自动续写次数,0=禁用(默认) - # - MAX_HISTORY_MESSAGES=-1 # 历史消息条数上限,-1=不限制 + # - MAX_HISTORY_MESSAGES=-1 # 历史消息条数上限,-1=不限制(建议改用 MAX_HISTORY_TOKENS) + # - MAX_HISTORY_TOKENS=130000 # 历史消息 token 数上限(推荐),默认 130000,参考值 120000~140000(tiktoken 低估约 10~20%,建议观察 UI 日志实际值后调整) # ── 日志持久化 ── # - LOG_FILE_ENABLED=true diff --git a/package-lock.json b/package-lock.json index bc728b6..75e096d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,16 +1,17 @@ { "name": "cursor2api", - "version": "2.7.2", + "version": "2.7.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "cursor2api", - "version": "2.7.2", + "version": "2.7.6", "dependencies": { "dotenv": "^16.5.0", "eventsource-parser": "^3.0.1", "express": "^5.1.0", + "js-tiktoken": "^1.0.21", "tesseract.js": "^7.0.0", "undici": "^7.22.0", "uuid": "^11.1.0", @@ -584,6 +585,26 @@ "node": ">= 0.6" } }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmmirror.com/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/bmp-js": { "version": "0.1.0", "resolved": "https://registry.npmmirror.com/bmp-js/-/bmp-js-0.1.0.tgz", @@ -1116,6 +1137,15 @@ "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", "license": "MIT" }, + "node_modules/js-tiktoken": { + "version": "1.0.21", + "resolved": "https://registry.npmmirror.com/js-tiktoken/-/js-tiktoken-1.0.21.tgz", + "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==", + "license": "MIT", + "dependencies": { + "base64-js": "^1.5.1" + } + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz", diff --git a/package.json b/package.json index 374c5aa..3838e36 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "dotenv": "^16.5.0", "eventsource-parser": "^3.0.1", "express": "^5.1.0", + "js-tiktoken": "^1.0.21", "tesseract.js": "^7.0.0", "undici": "^7.22.0", "uuid": "^11.1.0", diff --git a/src/config-api.ts b/src/config-api.ts index 081fbdc..7abfaf3 100644 --- a/src/config-api.ts +++ b/src/config-api.ts @@ -14,6 +14,7 @@ export function apiGetConfig(_req: Request, res: Response): void { timeout: cfg.timeout, max_auto_continue: cfg.maxAutoContinue, max_history_messages: cfg.maxHistoryMessages, + max_history_tokens: cfg.maxHistoryTokens, thinking: cfg.thinking !== undefined ? { enabled: cfg.thinking.enabled } : null, compression: { enabled: cfg.compression?.enabled ?? false, @@ -53,6 +54,9 @@ export function apiSaveConfig(req: Request, res: Response): void { if (body.max_history_messages !== undefined && typeof body.max_history_messages !== 'number') { res.status(400).json({ error: 'max_history_messages must be a number' }); return; } + if (body.max_history_tokens !== undefined && typeof body.max_history_tokens !== 'number') { + res.status(400).json({ error: 'max_history_tokens must be a number' }); return; + } try { // 读取现有 yaml(如不存在则从空对象开始) @@ -81,6 +85,10 @@ export function apiSaveConfig(req: Request, res: Response): void { changes.push(`max_history_messages: ${raw.max_history_messages ?? '(unset)'} → ${body.max_history_messages}`); raw.max_history_messages = body.max_history_messages; } + if (body.max_history_tokens !== undefined && body.max_history_tokens !== raw.max_history_tokens) { + changes.push(`max_history_tokens: ${raw.max_history_tokens ?? '(unset)'} → ${body.max_history_tokens}`); + raw.max_history_tokens = body.max_history_tokens; + } if (body.thinking !== undefined) { const t = body.thinking as { enabled: boolean | null } | null; const oldVal = JSON.stringify(raw.thinking); diff --git a/src/config.ts b/src/config.ts index 8877e68..1cffc10 100644 --- a/src/config.ts +++ b/src/config.ts @@ -37,6 +37,7 @@ function parseYamlConfig(defaults: AppConfig): { config: AppConfig; raw: Record< if (yaml.cursor_model) result.cursorModel = yaml.cursor_model; if (typeof yaml.max_auto_continue === 'number') result.maxAutoContinue = yaml.max_auto_continue; if (typeof yaml.max_history_messages === 'number') result.maxHistoryMessages = yaml.max_history_messages; + if (typeof yaml.max_history_tokens === 'number') result.maxHistoryTokens = yaml.max_history_tokens; if (yaml.fingerprint) { if (yaml.fingerprint.user_agent) result.fingerprint.userAgent = yaml.fingerprint.user_agent; } @@ -120,6 +121,7 @@ function applyEnvOverrides(cfg: AppConfig): void { if (process.env.CURSOR_MODEL) cfg.cursorModel = process.env.CURSOR_MODEL; if (process.env.MAX_AUTO_CONTINUE !== undefined) cfg.maxAutoContinue = parseInt(process.env.MAX_AUTO_CONTINUE); if (process.env.MAX_HISTORY_MESSAGES !== undefined) cfg.maxHistoryMessages = parseInt(process.env.MAX_HISTORY_MESSAGES); + if (process.env.MAX_HISTORY_TOKENS !== undefined) cfg.maxHistoryTokens = parseInt(process.env.MAX_HISTORY_TOKENS); if (process.env.AUTH_TOKEN) { cfg.authTokens = process.env.AUTH_TOKEN.split(',').map(s => s.trim()).filter(Boolean); } @@ -193,6 +195,7 @@ function defaultConfig(): AppConfig { cursorModel: 'anthropic/claude-sonnet-4.6', maxAutoContinue: 0, maxHistoryMessages: -1, + maxHistoryTokens: 130000, sanitizeEnabled: false, // 默认关闭响应内容清洗 fingerprint: { userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36', @@ -212,6 +215,7 @@ function detectChanges(oldCfg: AppConfig, newCfg: AppConfig): string[] { if (oldCfg.cursorModel !== newCfg.cursorModel) changes.push(`cursor_model: ${oldCfg.cursorModel} → ${newCfg.cursorModel}`); if (oldCfg.maxAutoContinue !== newCfg.maxAutoContinue) changes.push(`max_auto_continue: ${oldCfg.maxAutoContinue} → ${newCfg.maxAutoContinue}`); if (oldCfg.maxHistoryMessages !== newCfg.maxHistoryMessages) changes.push(`max_history_messages: ${oldCfg.maxHistoryMessages} → ${newCfg.maxHistoryMessages}`); + if (oldCfg.maxHistoryTokens !== newCfg.maxHistoryTokens) changes.push(`max_history_tokens: ${oldCfg.maxHistoryTokens} → ${newCfg.maxHistoryTokens}`); // auth_tokens const oldTokens = (oldCfg.authTokens || []).join(','); diff --git a/src/converter.ts b/src/converter.ts index 7eb478f..c18196f 100644 --- a/src/converter.ts +++ b/src/converter.ts @@ -24,6 +24,7 @@ import type { ParsedToolCall, } from './types.js'; import { getConfig } from './config.js'; +import { estimateTokens } from './tokenizer.js'; import { applyVisionInterceptor } from './vision.js'; import { fixToolCallArguments } from './tool-fixer.js'; import { getVisionProxyFetchOptions } from './proxy-agent.js'; @@ -675,6 +676,47 @@ I will ALWAYS use this exact \`\`\`json action\`\`\` block format for tool calls } } + // ★ 历史消息 token 数硬限制(比条数限制更精准) + // 优先扣除系统提示和工具定义的 token 占用,剩余额度从最早消息开始整条删除 + const maxHistoryTokens = config.maxHistoryTokens; + if (maxHistoryTokens >= 0) { + const fewShotOffset2 = hasTools ? 2 : 0; + + // 估算系统提示 token 数 + let overhead = 0; + if (req.system) { + const sysStr = typeof req.system === 'string' ? req.system : JSON.stringify(req.system); + overhead += estimateTokens(sysStr); + } + // 估算工具定义 token 数(压缩后约 70 tokens/工具 + 350 固定开销) + if (req.tools && req.tools.length > 0) { + overhead += req.tools.length * 70; + overhead += 350; + } + + const historyBudget = Math.max(0, maxHistoryTokens - overhead); + + // 从最新消息往前累加,找到超出预算的边界 + let usedTokens = 0; + let keepFrom = fewShotOffset2; + for (let i = messages.length - 1; i >= fewShotOffset2; i--) { + const msgChars = messages[i].parts.reduce((s, p) => s + (p.text?.length ?? 0), 0); + const msgTokens = estimateTokens(messages[i].parts.map(p => p.text ?? '').join('')); + if (usedTokens + msgTokens > historyBudget) { + keepFrom = i + 1; + break; + } + usedTokens += msgTokens; + keepFrom = i; + } + + if (keepFrom > fewShotOffset2) { + const removed = keepFrom - fewShotOffset2; + messages.splice(fewShotOffset2, removed); + console.log(`[Converter] token 预算裁剪: 移除最早 ${removed} 条消息,保留 ~${usedTokens} tokens (预算 ${historyBudget} tokens,系统开销 ${overhead} tokens)`); + } + } + // ★ 渐进式历史压缩(智能压缩,不破坏结构) // 可通过 config.yaml 的 compression 配置控制开关和级别 // 策略:保留最近 KEEP_RECENT 条消息完整,对早期消息进行结构感知压缩 diff --git a/src/tokenizer.ts b/src/tokenizer.ts new file mode 100644 index 0000000..3c7f772 --- /dev/null +++ b/src/tokenizer.ts @@ -0,0 +1,19 @@ +/** + * tokenizer.ts - 统一 token 估算模块 + * + * 使用 js-tiktoken 的 cl100k_base 编码器(与 Claude tokenizer 高度近似,误差 < 5%) + * 纯 JS 实现,无 WASM,无网络请求,ESM 兼容 + */ + +import { getEncoding } from 'js-tiktoken'; + +const enc = getEncoding('cl100k_base'); + +/** + * 估算文本的 token 数 + * 使用 cl100k_base 编码(GPT-3.5/4 同款,与 Claude tokenizer 近似) + */ +export function estimateTokens(text: string): number { + if (!text) return 0; + return enc.encode(text).length; +} diff --git a/src/types.ts b/src/types.ts index de0a49b..39f7030 100644 --- a/src/types.ts +++ b/src/types.ts @@ -91,6 +91,14 @@ export interface CursorPart { export interface CursorSSEEvent { type: string; delta?: string; + finishReason?: string; + messageMetadata?: { + usage?: { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + }; + }; } // ==================== Internal Types ==================== @@ -107,7 +115,8 @@ export interface AppConfig { cursorModel: string; authTokens?: string[]; // API 鉴权 token 列表,为空则不鉴权 maxAutoContinue: number; // 自动续写最大次数,默认 3,设 0 禁用 - maxHistoryMessages: number; // 历史消息条数硬限制,默认 100,-1 不限制 + maxHistoryMessages: number; // 历史消息条数硬限制,默认 -1(不限制) + maxHistoryTokens: number; // 历史消息 token 数上限(js-tiktoken 估算),默认 130000,-1 不限制 vision?: { enabled: boolean; mode: 'ocr' | 'api'; diff --git a/vue-ui/README.md b/vue-ui/README.md index 0ce502d..9a4c6cc 100644 --- a/vue-ui/README.md +++ b/vue-ui/README.md @@ -135,7 +135,8 @@ open http://localhost:3010/vuelogs | 基础 | `cursor_model` | 使用的 Cursor 模型 | | 基础 | `timeout` | 请求超时(秒) | | 基础 | `max_auto_continue` | 自动续写次数 | -| 基础 | `max_history_messages` | 历史消息条数上限 | +| 基础 | `max_history_messages` | 历史消息条数上限(建议改用 max_history_tokens) | +| 基础 | `max_history_tokens` | 历史消息 token 数上限(推荐),参考值 120000~140000(tiktoken 与 Claude 实际 tokenizer 有差异,建议观察 UI 日志实际值后调整) | | 功能 | `thinking.enabled` | Thinking 模式(跟随客户端/强制关闭/强制开启) | | 功能 | `sanitize_response` | 响应内容清洗 | | 历史压缩 | `compression.*` | 压缩开关、级别、保留条数等 | diff --git a/vue-ui/src/components/ConfigDrawer.vue b/vue-ui/src/components/ConfigDrawer.vue index 4611b6e..b72d5d7 100644 --- a/vue-ui/src/components/ConfigDrawer.vue +++ b/vue-ui/src/components/ConfigDrawer.vue @@ -25,9 +25,12 @@ - + + + + diff --git a/vue-ui/src/types.ts b/vue-ui/src/types.ts index 380a80d..195fd14 100644 --- a/vue-ui/src/types.ts +++ b/vue-ui/src/types.ts @@ -49,6 +49,8 @@ export interface RequestSummary { phaseTimings: PhaseTiming[]; thinkingChars: number; systemPromptLength: number; + inputTokens?: number; + outputTokens?: number; title?: string; } @@ -66,6 +68,7 @@ export interface HotConfig { timeout: number; max_auto_continue: number; max_history_messages: number; + max_history_tokens: number; thinking: { enabled: boolean } | null; compression: { enabled: boolean; level: 1 | 2 | 3; keep_recent: number; early_msg_max_chars: number }; tools: { schema_mode: 'compact' | 'full' | 'names_only'; description_max_length: number; passthrough?: boolean; disabled?: boolean };