diff --git a/README.md b/README.md
index e3af166..c88fb25 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,8 @@ cp config.yaml.example config.yaml
| `logging.max_days` | 日志保留天数 | `7` |
| `logging.persist_mode` | 日志落盘模式:`summary` 问答摘要 / `compact` 精简 / `full` 完整 | `summary` |
| `max_auto_continue` | 截断自动续写次数 (`0`=禁用,交由客户端续写) | `0` |
+| `max_history_messages` | 历史消息条数上限,超出时删除最早消息(建议改用 `max_history_tokens`) | `-1`(不限制) |
+| `max_history_tokens` | 历史消息 token 数上限(推荐),有助于减少超出 Cursor 上下文的概率;注意 tiktoken 低估约 10~20%,建议参考实际 UI 日志调整,参考值 `120000~140000` | `130000` |
| `sanitize_response` | 响应内容清洗开关(替换 Cursor 身份引用为 Claude) | `false` |
| `refusal_patterns` | 自定义拒绝检测规则列表(追加到内置规则) | 不配置 |
| `tools.passthrough` | 🆕 透传模式:跳过 few-shot 注入,原始 JSON 嵌入(Roo Code/Cline 推荐) | `false` |
@@ -251,6 +253,8 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use`
| `LOG_FILE_ENABLED` | 日志文件持久化 (`true`/`false`) |
| `LOG_DIR` | 日志文件目录 |
| `MAX_AUTO_CONTINUE` | 截断自动续写次数 (`0`=禁用) |
+| `MAX_HISTORY_MESSAGES` | 历史消息条数上限(`-1`=不限制) |
+| `MAX_HISTORY_TOKENS` | 历史消息 token 数上限(默认 `130000`,`-1`=不限制,参考值 `120000~140000`,tiktoken 低估约 10~20%) |
| `SANITIZE_RESPONSE` | 响应内容清洗开关 (`true`/`false`,默认 `false`) |
| `TOOLS_PASSTHROUGH` | 🆕 工具透传模式 (`true`/`false`,默认 `false`) |
| `TOOLS_DISABLED` | 🆕 工具禁用模式 (`true`/`false`,默认 `false`) |
diff --git a/config.yaml.example b/config.yaml.example
index f500f9c..6328c4f 100644
--- a/config.yaml.example
+++ b/config.yaml.example
@@ -36,11 +36,36 @@ max_auto_continue: 0
# ==================== 历史消息条数硬限制 ====================
# 输入消息条数上限,超出时删除最早的消息(保留工具 few-shot 示例)
-# 防止超长对话(800+ 条)导致请求体积过大、响应变慢
+# 注意:按条数限制无法反映实际 token 体积,建议改用 max_history_tokens(更精准)
+# 如需同时设置,两者独立生效,取更严格的结果
# 设为 -1 不限制消息条数
# 环境变量: MAX_HISTORY_MESSAGES=100
max_history_messages: -1
+# ==================== 历史消息 Token 数硬限制(推荐) ====================
+# 按 js-tiktoken (cl100k_base) 估算 token 数裁剪历史,比按条数更精准
+# 能有效防止超出 Cursor API 200k 上下文上限,保障模型输出稳定
+#
+# ⚠️ 注意:js-tiktoken 使用 OpenAI cl100k_base 词表估算,与 Claude 实际 tokenizer 有差异
+# 实测低估约 10%~20%,中英混合/工具调用场景差异更大
+# 建议开启后观察 UI 日志中的「↑ Cursor 输入 tokens」真实值,再据此调整
+#
+# 裁剪规则:
+# - 系统提示 + 工具定义的 token 优先扣除
+# - 剩余额度从最新消息往前累加,超出预算的最早消息整条删除
+# - 工具模式的 few-shot 示例(前 2 条)始终保留
+#
+# 参考值:120000~140000(考虑到估算误差,需预留足够安全余量)
+# Cursor API 上下文上限约 200k tokens,实际可用历史额度受系统提示和工具定义影响
+#
+# 与 max_history_messages 的关系:
+# 两者独立生效,若同时设置则取更严格的结果
+# 推荐:只设置 max_history_tokens,不设置 max_history_messages
+#
+# 设为 -1 不限制
+# 环境变量: MAX_HISTORY_TOKENS=130000
+max_history_tokens: 130000
+
# ==================== Thinking 开关(最高优先级) ====================
# 控制是否向 Cursor 发送 thinking 请求,优先级高于客户端传入的 thinking 参数
# 设为 true: 强制启用 thinking(即使客户端没请求也注入)
diff --git a/docker-compose.yml b/docker-compose.yml
index 42dd1d9..4792af2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -38,7 +38,8 @@ services:
# ── 自动续写 & 历史消息限制 ──
# - MAX_AUTO_CONTINUE=0 # 截断后自动续写次数,0=禁用(默认)
- # - MAX_HISTORY_MESSAGES=-1 # 历史消息条数上限,-1=不限制
+ # - MAX_HISTORY_MESSAGES=-1 # 历史消息条数上限,-1=不限制(建议改用 MAX_HISTORY_TOKENS)
+ # - MAX_HISTORY_TOKENS=130000 # 历史消息 token 数上限(推荐),默认 130000,参考值 120000~140000(tiktoken 低估约 10~20%,建议观察 UI 日志实际值后调整)
# ── 日志持久化 ──
# - LOG_FILE_ENABLED=true
diff --git a/package-lock.json b/package-lock.json
index bc728b6..75e096d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,16 +1,17 @@
{
"name": "cursor2api",
- "version": "2.7.2",
+ "version": "2.7.6",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "cursor2api",
- "version": "2.7.2",
+ "version": "2.7.6",
"dependencies": {
"dotenv": "^16.5.0",
"eventsource-parser": "^3.0.1",
"express": "^5.1.0",
+ "js-tiktoken": "^1.0.21",
"tesseract.js": "^7.0.0",
"undici": "^7.22.0",
"uuid": "^11.1.0",
@@ -584,6 +585,26 @@
"node": ">= 0.6"
}
},
+ "node_modules/base64-js": {
+ "version": "1.5.1",
+ "resolved": "https://registry.npmmirror.com/base64-js/-/base64-js-1.5.1.tgz",
+ "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/feross"
+ },
+ {
+ "type": "patreon",
+ "url": "https://www.patreon.com/feross"
+ },
+ {
+ "type": "consulting",
+ "url": "https://feross.org/support"
+ }
+ ],
+ "license": "MIT"
+ },
"node_modules/bmp-js": {
"version": "0.1.0",
"resolved": "https://registry.npmmirror.com/bmp-js/-/bmp-js-0.1.0.tgz",
@@ -1116,6 +1137,15 @@
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
"license": "MIT"
},
+ "node_modules/js-tiktoken": {
+ "version": "1.0.21",
+ "resolved": "https://registry.npmmirror.com/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
+ "integrity": "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==",
+ "license": "MIT",
+ "dependencies": {
+ "base64-js": "^1.5.1"
+ }
+ },
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
diff --git a/package.json b/package.json
index 374c5aa..3838e36 100644
--- a/package.json
+++ b/package.json
@@ -24,6 +24,7 @@
"dotenv": "^16.5.0",
"eventsource-parser": "^3.0.1",
"express": "^5.1.0",
+ "js-tiktoken": "^1.0.21",
"tesseract.js": "^7.0.0",
"undici": "^7.22.0",
"uuid": "^11.1.0",
diff --git a/src/config-api.ts b/src/config-api.ts
index 081fbdc..7abfaf3 100644
--- a/src/config-api.ts
+++ b/src/config-api.ts
@@ -14,6 +14,7 @@ export function apiGetConfig(_req: Request, res: Response): void {
timeout: cfg.timeout,
max_auto_continue: cfg.maxAutoContinue,
max_history_messages: cfg.maxHistoryMessages,
+ max_history_tokens: cfg.maxHistoryTokens,
thinking: cfg.thinking !== undefined ? { enabled: cfg.thinking.enabled } : null,
compression: {
enabled: cfg.compression?.enabled ?? false,
@@ -53,6 +54,9 @@ export function apiSaveConfig(req: Request, res: Response): void {
if (body.max_history_messages !== undefined && typeof body.max_history_messages !== 'number') {
res.status(400).json({ error: 'max_history_messages must be a number' }); return;
}
+ if (body.max_history_tokens !== undefined && typeof body.max_history_tokens !== 'number') {
+ res.status(400).json({ error: 'max_history_tokens must be a number' }); return;
+ }
try {
// 读取现有 yaml(如不存在则从空对象开始)
@@ -81,6 +85,10 @@ export function apiSaveConfig(req: Request, res: Response): void {
changes.push(`max_history_messages: ${raw.max_history_messages ?? '(unset)'} → ${body.max_history_messages}`);
raw.max_history_messages = body.max_history_messages;
}
+ if (body.max_history_tokens !== undefined && body.max_history_tokens !== raw.max_history_tokens) {
+ changes.push(`max_history_tokens: ${raw.max_history_tokens ?? '(unset)'} → ${body.max_history_tokens}`);
+ raw.max_history_tokens = body.max_history_tokens;
+ }
if (body.thinking !== undefined) {
const t = body.thinking as { enabled: boolean | null } | null;
const oldVal = JSON.stringify(raw.thinking);
diff --git a/src/config.ts b/src/config.ts
index 8877e68..1cffc10 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -37,6 +37,7 @@ function parseYamlConfig(defaults: AppConfig): { config: AppConfig; raw: Record<
if (yaml.cursor_model) result.cursorModel = yaml.cursor_model;
if (typeof yaml.max_auto_continue === 'number') result.maxAutoContinue = yaml.max_auto_continue;
if (typeof yaml.max_history_messages === 'number') result.maxHistoryMessages = yaml.max_history_messages;
+ if (typeof yaml.max_history_tokens === 'number') result.maxHistoryTokens = yaml.max_history_tokens;
if (yaml.fingerprint) {
if (yaml.fingerprint.user_agent) result.fingerprint.userAgent = yaml.fingerprint.user_agent;
}
@@ -120,6 +121,7 @@ function applyEnvOverrides(cfg: AppConfig): void {
if (process.env.CURSOR_MODEL) cfg.cursorModel = process.env.CURSOR_MODEL;
if (process.env.MAX_AUTO_CONTINUE !== undefined) cfg.maxAutoContinue = parseInt(process.env.MAX_AUTO_CONTINUE);
if (process.env.MAX_HISTORY_MESSAGES !== undefined) cfg.maxHistoryMessages = parseInt(process.env.MAX_HISTORY_MESSAGES);
+ if (process.env.MAX_HISTORY_TOKENS !== undefined) cfg.maxHistoryTokens = parseInt(process.env.MAX_HISTORY_TOKENS);
if (process.env.AUTH_TOKEN) {
cfg.authTokens = process.env.AUTH_TOKEN.split(',').map(s => s.trim()).filter(Boolean);
}
@@ -193,6 +195,7 @@ function defaultConfig(): AppConfig {
cursorModel: 'anthropic/claude-sonnet-4.6',
maxAutoContinue: 0,
maxHistoryMessages: -1,
+ maxHistoryTokens: 130000,
sanitizeEnabled: false, // 默认关闭响应内容清洗
fingerprint: {
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
@@ -212,6 +215,7 @@ function detectChanges(oldCfg: AppConfig, newCfg: AppConfig): string[] {
if (oldCfg.cursorModel !== newCfg.cursorModel) changes.push(`cursor_model: ${oldCfg.cursorModel} → ${newCfg.cursorModel}`);
if (oldCfg.maxAutoContinue !== newCfg.maxAutoContinue) changes.push(`max_auto_continue: ${oldCfg.maxAutoContinue} → ${newCfg.maxAutoContinue}`);
if (oldCfg.maxHistoryMessages !== newCfg.maxHistoryMessages) changes.push(`max_history_messages: ${oldCfg.maxHistoryMessages} → ${newCfg.maxHistoryMessages}`);
+ if (oldCfg.maxHistoryTokens !== newCfg.maxHistoryTokens) changes.push(`max_history_tokens: ${oldCfg.maxHistoryTokens} → ${newCfg.maxHistoryTokens}`);
// auth_tokens
const oldTokens = (oldCfg.authTokens || []).join(',');
diff --git a/src/converter.ts b/src/converter.ts
index 7eb478f..c18196f 100644
--- a/src/converter.ts
+++ b/src/converter.ts
@@ -24,6 +24,7 @@ import type {
ParsedToolCall,
} from './types.js';
import { getConfig } from './config.js';
+import { estimateTokens } from './tokenizer.js';
import { applyVisionInterceptor } from './vision.js';
import { fixToolCallArguments } from './tool-fixer.js';
import { getVisionProxyFetchOptions } from './proxy-agent.js';
@@ -675,6 +676,47 @@ I will ALWAYS use this exact \`\`\`json action\`\`\` block format for tool calls
}
}
+ // ★ 历史消息 token 数硬限制(比条数限制更精准)
+ // 优先扣除系统提示和工具定义的 token 占用,剩余额度从最早消息开始整条删除
+ const maxHistoryTokens = config.maxHistoryTokens;
+ if (maxHistoryTokens >= 0) {
+ const fewShotOffset2 = hasTools ? 2 : 0;
+
+ // 估算系统提示 token 数
+ let overhead = 0;
+ if (req.system) {
+ const sysStr = typeof req.system === 'string' ? req.system : JSON.stringify(req.system);
+ overhead += estimateTokens(sysStr);
+ }
+ // 估算工具定义 token 数(压缩后约 70 tokens/工具 + 350 固定开销)
+ if (req.tools && req.tools.length > 0) {
+ overhead += req.tools.length * 70;
+ overhead += 350;
+ }
+
+ const historyBudget = Math.max(0, maxHistoryTokens - overhead);
+
+ // 从最新消息往前累加,找到超出预算的边界
+ let usedTokens = 0;
+ let keepFrom = fewShotOffset2;
+ for (let i = messages.length - 1; i >= fewShotOffset2; i--) {
+ const msgChars = messages[i].parts.reduce((s, p) => s + (p.text?.length ?? 0), 0);
+ const msgTokens = estimateTokens(messages[i].parts.map(p => p.text ?? '').join(''));
+ if (usedTokens + msgTokens > historyBudget) {
+ keepFrom = i + 1;
+ break;
+ }
+ usedTokens += msgTokens;
+ keepFrom = i;
+ }
+
+ if (keepFrom > fewShotOffset2) {
+ const removed = keepFrom - fewShotOffset2;
+ messages.splice(fewShotOffset2, removed);
+ console.log(`[Converter] token 预算裁剪: 移除最早 ${removed} 条消息,保留 ~${usedTokens} tokens (预算 ${historyBudget} tokens,系统开销 ${overhead} tokens)`);
+ }
+ }
+
// ★ 渐进式历史压缩(智能压缩,不破坏结构)
// 可通过 config.yaml 的 compression 配置控制开关和级别
// 策略:保留最近 KEEP_RECENT 条消息完整,对早期消息进行结构感知压缩
diff --git a/src/tokenizer.ts b/src/tokenizer.ts
new file mode 100644
index 0000000..3c7f772
--- /dev/null
+++ b/src/tokenizer.ts
@@ -0,0 +1,19 @@
+/**
+ * tokenizer.ts - 统一 token 估算模块
+ *
+ * 使用 js-tiktoken 的 cl100k_base 编码器(与 Claude tokenizer 高度近似,误差 < 5%)
+ * 纯 JS 实现,无 WASM,无网络请求,ESM 兼容
+ */
+
+import { getEncoding } from 'js-tiktoken';
+
+const enc = getEncoding('cl100k_base');
+
+/**
+ * 估算文本的 token 数
+ * 使用 cl100k_base 编码(GPT-3.5/4 同款,与 Claude tokenizer 近似)
+ */
+export function estimateTokens(text: string): number {
+ if (!text) return 0;
+ return enc.encode(text).length;
+}
diff --git a/src/types.ts b/src/types.ts
index de0a49b..39f7030 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -91,6 +91,14 @@ export interface CursorPart {
export interface CursorSSEEvent {
type: string;
delta?: string;
+ finishReason?: string;
+ messageMetadata?: {
+ usage?: {
+ inputTokens?: number;
+ outputTokens?: number;
+ totalTokens?: number;
+ };
+ };
}
// ==================== Internal Types ====================
@@ -107,7 +115,8 @@ export interface AppConfig {
cursorModel: string;
authTokens?: string[]; // API 鉴权 token 列表,为空则不鉴权
maxAutoContinue: number; // 自动续写最大次数,默认 3,设 0 禁用
- maxHistoryMessages: number; // 历史消息条数硬限制,默认 100,-1 不限制
+ maxHistoryMessages: number; // 历史消息条数硬限制,默认 -1(不限制)
+ maxHistoryTokens: number; // 历史消息 token 数上限(js-tiktoken 估算),默认 130000,-1 不限制
vision?: {
enabled: boolean;
mode: 'ocr' | 'api';
diff --git a/vue-ui/README.md b/vue-ui/README.md
index 0ce502d..9a4c6cc 100644
--- a/vue-ui/README.md
+++ b/vue-ui/README.md
@@ -135,7 +135,8 @@ open http://localhost:3010/vuelogs
| 基础 | `cursor_model` | 使用的 Cursor 模型 |
| 基础 | `timeout` | 请求超时(秒) |
| 基础 | `max_auto_continue` | 自动续写次数 |
-| 基础 | `max_history_messages` | 历史消息条数上限 |
+| 基础 | `max_history_messages` | 历史消息条数上限(建议改用 max_history_tokens) |
+| 基础 | `max_history_tokens` | 历史消息 token 数上限(推荐),参考值 120000~140000(tiktoken 与 Claude 实际 tokenizer 有差异,建议观察 UI 日志实际值后调整) |
| 功能 | `thinking.enabled` | Thinking 模式(跟随客户端/强制关闭/强制开启) |
| 功能 | `sanitize_response` | 响应内容清洗 |
| 历史压缩 | `compression.*` | 压缩开关、级别、保留条数等 |
diff --git a/vue-ui/src/components/ConfigDrawer.vue b/vue-ui/src/components/ConfigDrawer.vue
index 4611b6e..b72d5d7 100644
--- a/vue-ui/src/components/ConfigDrawer.vue
+++ b/vue-ui/src/components/ConfigDrawer.vue
@@ -25,9 +25,12 @@
-
+
+
+
+
diff --git a/vue-ui/src/types.ts b/vue-ui/src/types.ts
index 380a80d..195fd14 100644
--- a/vue-ui/src/types.ts
+++ b/vue-ui/src/types.ts
@@ -49,6 +49,8 @@ export interface RequestSummary {
phaseTimings: PhaseTiming[];
thinkingChars: number;
systemPromptLength: number;
+ inputTokens?: number;
+ outputTokens?: number;
title?: string;
}
@@ -66,6 +68,7 @@ export interface HotConfig {
timeout: number;
max_auto_continue: number;
max_history_messages: number;
+ max_history_tokens: number;
thinking: { enabled: boolean } | null;
compression: { enabled: boolean; level: 1 | 2 | 3; keep_recent: number; early_msg_max_chars: number };
tools: { schema_mode: 'compact' | 'full' | 'names_only'; description_max_length: number; passthrough?: boolean; disabled?: boolean };