mirror of
https://github.com/7836246/cursor2api.git
synced 2026-05-07 22:27:15 +08:00
Merge pull request #88 from huangzt/feature/vue-logs-ui
fix: 优化 token 预算精度,新增 TokenDiff 日志对比功能
This commit is contained in:
@@ -21,6 +21,9 @@ WORKDIR /app
|
||||
# 设置为生产环境
|
||||
ENV NODE_ENV=production
|
||||
|
||||
# 增大 Node.js 堆内存上限,防止日志文件过大时加载 OOM(tesseract.js / js-tiktoken 初始化也有一定内存需求)
|
||||
ENV NODE_OPTIONS="--max-old-space-size=4096"
|
||||
|
||||
# 出于安全考虑,避免使用 root 用户运行服务
|
||||
RUN addgroup --system --gid 1001 nodejs && \
|
||||
adduser --system --uid 1001 cursor
|
||||
|
||||
@@ -83,7 +83,7 @@ cp config.yaml.example config.yaml
|
||||
| `logging.persist_mode` | 日志落盘模式:`summary` 问答摘要 / `compact` 精简 / `full` 完整 | `summary` |
|
||||
| `max_auto_continue` | 截断自动续写次数 (`0`=禁用,交由客户端续写) | `0` |
|
||||
| `max_history_messages` | 历史消息条数上限,超出时删除最早消息(建议改用 `max_history_tokens`) | `-1`(不限制) |
|
||||
| `max_history_tokens` | 历史消息 token 数上限(推荐),有助于减少超出 Cursor 上下文的概率;注意 tiktoken 低估约 10~20%,建议参考实际 UI 日志调整,参考值 `120000~140000` | `130000` |
|
||||
| `max_history_tokens` | 历史消息 token 数上限(推荐),代码自动补偿 Cursor 后端开销(1,300 基础 + 工具 tokenizer 差异),参考值 `130000~170000` | `150000` |
|
||||
| `sanitize_response` | 响应内容清洗开关(替换 Cursor 身份引用为 Claude) | `false` |
|
||||
| `refusal_patterns` | 自定义拒绝检测规则列表(追加到内置规则) | 不配置 |
|
||||
| `tools.passthrough` | 🆕 透传模式:跳过 few-shot 注入,原始 JSON 嵌入(Roo Code/Cline 推荐) | `false` |
|
||||
@@ -243,6 +243,8 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use`
|
||||
|
||||
| 环境变量 | 说明 |
|
||||
|----------|------|
|
||||
> ⚠️ **环境变量优先级高于 `config.yaml`**:若在 docker-compose 等环境中设置了环境变量,该参数的 `config.yaml` 配置会被覆盖,热重载对其**无效**。需要通过 `config.yaml` 动态调整的参数,请勿同时在环境变量中设置。
|
||||
|
||||
| `PORT` | 服务端口 |
|
||||
| `AUTH_TOKEN` | API 鉴权 token(逗号分隔多个) |
|
||||
| `PROXY` | 全局代理地址 |
|
||||
@@ -254,7 +256,7 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use`
|
||||
| `LOG_DIR` | 日志文件目录 |
|
||||
| `MAX_AUTO_CONTINUE` | 截断自动续写次数 (`0`=禁用) |
|
||||
| `MAX_HISTORY_MESSAGES` | 历史消息条数上限(`-1`=不限制) |
|
||||
| `MAX_HISTORY_TOKENS` | 历史消息 token 数上限(默认 `130000`,`-1`=不限制,参考值 `120000~140000`,tiktoken 低估约 10~20%) |
|
||||
| `MAX_HISTORY_TOKENS` | 历史消息 token 数上限(默认 `150000`,`-1`=不限制,参考值 `130000~170000`,代码自动补偿 Cursor 后端开销) |
|
||||
| `SANITIZE_RESPONSE` | 响应内容清洗开关 (`true`/`false`,默认 `false`) |
|
||||
| `TOOLS_PASSTHROUGH` | 🆕 工具透传模式 (`true`/`false`,默认 `false`) |
|
||||
| `TOOLS_DISABLED` | 🆕 工具禁用模式 (`true`/`false`,默认 `false`) |
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
# Cursor2API v2 配置文件
|
||||
# 复制此文件为 config.yaml 并根据需要修改
|
||||
#
|
||||
# ⚠️ 环境变量优先级高于此文件:
|
||||
# 若通过环境变量(如 docker-compose 的 environment 块)设置了某个参数,
|
||||
# 则修改此文件对该参数无效,热重载也不会生效。
|
||||
# 需要在 config.yaml 中管理的参数,请勿同时在环境变量中设置。
|
||||
|
||||
# 服务端口
|
||||
port: 3010
|
||||
@@ -46,25 +51,27 @@ max_history_messages: -1
|
||||
# 按 js-tiktoken (cl100k_base) 估算 token 数裁剪历史,比按条数更精准
|
||||
# 能有效防止超出 Cursor API 200k 上下文上限,保障模型输出稳定
|
||||
#
|
||||
# ⚠️ 注意:js-tiktoken 使用 OpenAI cl100k_base 词表估算,与 Claude 实际 tokenizer 有差异
|
||||
# 实测低估约 10%~20%,中英混合/工具调用场景差异更大
|
||||
# 建议开启后观察 UI 日志中的「↑ Cursor 输入 tokens」真实值,再据此调整
|
||||
# 说明:此值仅计算我们发送的消息内容 token
|
||||
# 代码会自动额外补偿 Cursor 后端开销(动态计算):
|
||||
# - 基础隐藏系统提示:约 1,300 tokens(固定)
|
||||
# - 工具 tokenizer 差异:compact ~20/工具,full ~240/工具,names_only ~5/工具
|
||||
# 输出空间不在此预留,由用户自行通过此值控制(建议留 16,000~32,000 余量)
|
||||
#
|
||||
# 裁剪规则:
|
||||
# - 系统提示 + 工具定义的 token 优先扣除
|
||||
# - 系统提示 + 工具定义的 token 优先扣除(含上述固定开销)
|
||||
# - 剩余额度从最新消息往前累加,超出预算的最早消息整条删除
|
||||
# - 工具模式的 few-shot 示例(前 2 条)始终保留
|
||||
#
|
||||
# 参考值:120000~140000(考虑到估算误差,需预留足够安全余量)
|
||||
# Cursor API 上下文上限约 200k tokens,实际可用历史额度受系统提示和工具定义影响
|
||||
# 参考值:130000~170000,默认 150000
|
||||
# Cursor API 上下文上限约 200k tokens,建议 max_history_tokens + 开销 + 预留输出 ≤ 200000
|
||||
#
|
||||
# 与 max_history_messages 的关系:
|
||||
# 两者独立生效,若同时设置则取更严格的结果
|
||||
# 推荐:只设置 max_history_tokens,不设置 max_history_messages
|
||||
#
|
||||
# 设为 -1 不限制
|
||||
# 环境变量: MAX_HISTORY_TOKENS=130000
|
||||
max_history_tokens: 130000
|
||||
# 环境变量: MAX_HISTORY_TOKENS=150000
|
||||
max_history_tokens: 150000
|
||||
|
||||
# ==================== Thinking 开关(最高优先级) ====================
|
||||
# 控制是否向 Cursor 发送 thinking 请求,优先级高于客户端传入的 thinking 参数
|
||||
|
||||
@@ -39,7 +39,7 @@ services:
|
||||
# ── 自动续写 & 历史消息限制 ──
|
||||
# - MAX_AUTO_CONTINUE=0 # 截断后自动续写次数,0=禁用(默认)
|
||||
# - MAX_HISTORY_MESSAGES=-1 # 历史消息条数上限,-1=不限制(建议改用 MAX_HISTORY_TOKENS)
|
||||
# - MAX_HISTORY_TOKENS=130000 # 历史消息 token 数上限(推荐),默认 130000,参考值 120000~140000(tiktoken 低估约 10~20%,建议观察 UI 日志实际值后调整)
|
||||
# - MAX_HISTORY_TOKENS=150000 # 历史消息 token 数上限(推荐),默认 150000,参考值 130000~170000(代码自动补偿 Cursor 后端开销)
|
||||
|
||||
# ── 日志持久化 ──
|
||||
# - LOG_FILE_ENABLED=true
|
||||
|
||||
@@ -195,7 +195,7 @@ function defaultConfig(): AppConfig {
|
||||
cursorModel: 'anthropic/claude-sonnet-4.6',
|
||||
maxAutoContinue: 0,
|
||||
maxHistoryMessages: -1,
|
||||
maxHistoryTokens: 130000,
|
||||
maxHistoryTokens: 150000,
|
||||
sanitizeEnabled: false, // 默认关闭响应内容清洗
|
||||
fingerprint: {
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36',
|
||||
|
||||
@@ -682,17 +682,21 @@ I will ALWAYS use this exact \`\`\`json action\`\`\` block format for tool calls
|
||||
if (maxHistoryTokens >= 0) {
|
||||
const fewShotOffset2 = hasTools ? 2 : 0;
|
||||
|
||||
// 估算系统提示 token 数
|
||||
// 直接对已构建的 few-shot 消息(系统提示+工具定义+few-shot回复)调用 estimateTokens
|
||||
// 比 tools.length*70+350 更准确,因为实际注入文字已经在 messages[0..fewShotOffset2-1] 中
|
||||
let overhead = 0;
|
||||
if (req.system) {
|
||||
const sysStr = typeof req.system === 'string' ? req.system : JSON.stringify(req.system);
|
||||
overhead += estimateTokens(sysStr);
|
||||
}
|
||||
// 估算工具定义 token 数(压缩后约 70 tokens/工具 + 350 固定开销)
|
||||
if (req.tools && req.tools.length > 0) {
|
||||
overhead += req.tools.length * 70;
|
||||
overhead += 350;
|
||||
for (let i = 0; i < fewShotOffset2; i++) {
|
||||
overhead += estimateTokens(messages[i].parts.map(p => p.text ?? '').join(''));
|
||||
}
|
||||
// Cursor 后端额外开销:基础隐藏系统提示(实测约 1300 tokens)+ 工具 tokenizer 差异
|
||||
// 注意:工具定义已通过 buildToolInstructions 转为文本注入 messages[0],并已在上方 estimateTokens 中计算
|
||||
// Cursor 后端对工具的额外 tokenizer 差异与 schema_mode 强相关:
|
||||
// compact模式 ~20 tokens/工具,full模式 ~240 tokens/工具,names_only ~5 tokens/工具
|
||||
// 输出空间不在此预留,由用户通过 max_history_tokens 自行控制
|
||||
const toolCount = req.tools?.length ?? 0;
|
||||
const schemaMode = getConfig().tools?.schemaMode ?? 'compact';
|
||||
const perToolOverhead = schemaMode === 'full' ? 240 : (schemaMode === 'names_only' ? 5 : 20);
|
||||
overhead += 1300 + toolCount * perToolOverhead;
|
||||
|
||||
const historyBudget = Math.max(0, maxHistoryTokens - overhead);
|
||||
|
||||
|
||||
@@ -247,14 +247,18 @@ async function sendCursorRequestInner(
|
||||
}
|
||||
|
||||
/**
|
||||
* 发送非流式请求,收集完整响应
|
||||
* 发送非流式请求,收集完整响应及 usage 信息
|
||||
*/
|
||||
export async function sendCursorRequestFull(req: CursorChatRequest): Promise<string> {
|
||||
export async function sendCursorRequestFull(req: CursorChatRequest): Promise<{ text: string; usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number } }> {
|
||||
let fullText = '';
|
||||
let usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined;
|
||||
await sendCursorRequest(req, (event) => {
|
||||
if (event.type === 'text-delta' && event.delta) {
|
||||
fullText += event.delta;
|
||||
}
|
||||
if (event.messageMetadata?.usage) {
|
||||
usage = event.messageMetadata.usage;
|
||||
}
|
||||
});
|
||||
return fullText;
|
||||
return { text: fullText, usage };
|
||||
}
|
||||
|
||||
@@ -97,6 +97,19 @@ export function listModels(_req: Request, res: Response): void {
|
||||
|
||||
// ==================== Token 计数 ====================
|
||||
|
||||
/**
|
||||
* 对实际发往 Cursor 的完整消息内容做 token 估算(用于与 Cursor 返回值对比)
|
||||
*/
|
||||
export function estimateCursorReqTokens(cursorReq: CursorChatRequest): number {
|
||||
let total = 0;
|
||||
for (const msg of cursorReq.messages) {
|
||||
for (const part of msg.parts) {
|
||||
total += estimateTokens(part.text ?? '');
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
export function estimateInputTokens(body: AnthropicRequest): number {
|
||||
let total = 0;
|
||||
|
||||
@@ -479,6 +492,8 @@ function toolCallNeedsMoreContinuation(toolCall: ParsedToolCall): boolean {
|
||||
*/
|
||||
export function shouldAutoContinueTruncatedToolResponse(text: string, hasTools: boolean): boolean {
|
||||
if (!hasTools || !isTruncated(text)) return false;
|
||||
// 响应过短(< 200 chars)时不触发续写:上下文不足会导致模型拒绝或错误续写
|
||||
if (text.trim().length < 200) return false;
|
||||
if (!hasToolCalls(text)) return true;
|
||||
|
||||
const { toolCalls } = parseToolCalls(text);
|
||||
@@ -677,7 +692,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
|
||||
],
|
||||
};
|
||||
|
||||
const continuationResponse = await sendCursorRequestFull(continuationReq);
|
||||
const { text: continuationResponse } = await sendCursorRequestFull(continuationReq);
|
||||
if (continuationResponse.trim().length === 0) break;
|
||||
|
||||
const deduped = deduplicateContinuation(fullText, continuationResponse);
|
||||
@@ -1005,9 +1020,12 @@ async function handleDirectTextStream(
|
||||
? sanitizeResponse(finalVisibleText)
|
||||
: finalTextToSend;
|
||||
log.recordFinalResponse(finalRecordedResponse);
|
||||
const estimatedInput1 = estimateCursorReqTokens(activeCursorReq);
|
||||
const actualInput1 = cursorUsage?.inputTokens;
|
||||
console.log(`[TokenDiff] 流式(无工具) 估算(我们发的)=${estimatedInput1} Cursor实际=${actualInput1 ?? 'N/A'} Cursor隐藏开销=${actualInput1 != null ? (actualInput1 - estimatedInput1) : 'N/A'}`);
|
||||
log.updateSummary({
|
||||
inputTokens: cursorUsage?.inputTokens ?? estimateInputTokens(body),
|
||||
outputTokens: cursorUsage?.outputTokens ?? estimateTokens(finalRecordedResponse),
|
||||
inputTokens: cursorUsage?.inputTokens,
|
||||
outputTokens: cursorUsage?.outputTokens,
|
||||
});
|
||||
log.complete(finalRecordedResponse.length, 'end_turn');
|
||||
|
||||
@@ -1658,9 +1676,12 @@ Please go ahead and pick the most appropriate tool for the current task and outp
|
||||
|
||||
// ★ 记录完成
|
||||
log.recordFinalResponse(fullResponse);
|
||||
const estimatedInput2 = estimateCursorReqTokens(activeCursorReq);
|
||||
const actualInput2 = cursorUsage?.inputTokens;
|
||||
console.log(`[TokenDiff] 流式(有工具) 估算(我们发的)=${estimatedInput2} Cursor实际=${actualInput2 ?? 'N/A'} Cursor隐藏开销=${actualInput2 != null ? (actualInput2 - estimatedInput2) : 'N/A'}`);
|
||||
log.updateSummary({
|
||||
inputTokens: cursorUsage?.inputTokens ?? estimateInputTokens(body),
|
||||
outputTokens: cursorUsage?.outputTokens ?? estimateTokens(fullResponse),
|
||||
inputTokens: cursorUsage?.inputTokens,
|
||||
outputTokens: cursorUsage?.outputTokens,
|
||||
});
|
||||
log.complete(fullResponse.length, stopReason);
|
||||
|
||||
@@ -1695,7 +1716,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
|
||||
try {
|
||||
log.startPhase('send', '发送到 Cursor (非流式)');
|
||||
const apiStart = Date.now();
|
||||
let fullText = await sendCursorRequestFull(cursorReq);
|
||||
let { text: fullText, usage: cursorUsage } = await sendCursorRequestFull(cursorReq);
|
||||
log.recordTTFT();
|
||||
log.recordCursorApiTime(apiStart);
|
||||
log.recordRawResponse(fullText);
|
||||
@@ -1738,7 +1759,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
|
||||
log.updateSummary({ retryCount });
|
||||
const retryBody = buildRetryRequest(body, attempt);
|
||||
activeCursorReq = await convertToCursorRequest(retryBody);
|
||||
fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
({ text: fullText, usage: cursorUsage } = await sendCursorRequestFull(activeCursorReq));
|
||||
// 重试后也需要剥离 thinking 标签
|
||||
if (hasLeadingThinking(fullText)) {
|
||||
const { thinkingContent: retryThinking, strippedText: retryStripped } = extractThinking(fullText);
|
||||
@@ -1768,7 +1789,7 @@ async function handleNonStream(res: Response, cursorReq: CursorChatRequest, body
|
||||
retryCount++;
|
||||
log.warn('Handler', 'retry', `非流式响应过短 (${fullText.length} chars),重试第${retryCount}次`);
|
||||
activeCursorReq = await convertToCursorRequest(body);
|
||||
fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
({ text: fullText, usage: cursorUsage } = await sendCursorRequestFull(activeCursorReq));
|
||||
log.info('Handler', 'retry', `非流式重试响应: ${fullText.length} chars`, { preview: fullText.substring(0, 200) });
|
||||
}
|
||||
|
||||
@@ -1813,7 +1834,7 @@ Continue EXACTLY from where you stopped. DO NOT repeat any content already gener
|
||||
],
|
||||
};
|
||||
|
||||
const continuationResponse = await sendCursorRequestFull(continuationReq);
|
||||
const { text: continuationResponse } = await sendCursorRequestFull(continuationReq);
|
||||
|
||||
if (continuationResponse.trim().length === 0) {
|
||||
log.warn('Handler', 'continuation', '非流式续写返回空响应,停止续写');
|
||||
@@ -1919,7 +1940,7 @@ Please go ahead and pick the most appropriate tool for the current task and outp
|
||||
},
|
||||
];
|
||||
activeCursorReq = { ...activeCursorReq, messages: forceMessages };
|
||||
fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
({ text: fullText } = await sendCursorRequestFull(activeCursorReq));
|
||||
({ toolCalls, cleanText } = parseToolCalls(fullText));
|
||||
}
|
||||
if (toolChoice?.type === 'any' && toolCalls.length === 0) {
|
||||
@@ -1983,7 +2004,10 @@ Please go ahead and pick the most appropriate tool for the current task and outp
|
||||
|
||||
// ★ 记录完成
|
||||
log.recordFinalResponse(fullText);
|
||||
log.updateSummary({ inputTokens: estimateInputTokens(body), outputTokens: estimateTokens(fullText) });
|
||||
const estimatedInput = estimateCursorReqTokens(activeCursorReq);
|
||||
const actualInput = cursorUsage?.inputTokens;
|
||||
console.log(`[TokenDiff] 非流式 估算(我们发的)=${estimatedInput} Cursor实际=${actualInput ?? 'N/A'} Cursor隐藏开销=${actualInput != null ? (actualInput - estimatedInput) : 'N/A'}`);
|
||||
log.updateSummary({ inputTokens: cursorUsage?.inputTokens, outputTokens: cursorUsage?.outputTokens });
|
||||
log.complete(fullText.length, stopReason);
|
||||
|
||||
} catch (err: unknown) {
|
||||
|
||||
@@ -1134,7 +1134,7 @@ async function handleOpenAINonStream(
|
||||
log: RequestLogger,
|
||||
): Promise<void> {
|
||||
let activeCursorReq = cursorReq;
|
||||
let fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
let fullText = (await sendCursorRequestFull(activeCursorReq)).text;
|
||||
const hasTools = (body.tools?.length ?? 0) > 0;
|
||||
|
||||
// 日志记录在详细日志中
|
||||
@@ -1162,7 +1162,7 @@ async function handleOpenAINonStream(
|
||||
const retryBody = buildRetryRequest(anthropicReq, attempt);
|
||||
const retryCursorReq = await convertToCursorRequest(retryBody);
|
||||
activeCursorReq = retryCursorReq;
|
||||
fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
fullText = (await sendCursorRequestFull(activeCursorReq)).text;
|
||||
// 重试响应也需要先剥离 thinking
|
||||
if (hasLeadingThinking(fullText)) {
|
||||
fullText = extractThinking(fullText).strippedText;
|
||||
@@ -1775,7 +1775,7 @@ async function handleResponsesNonStream(
|
||||
log: RequestLogger,
|
||||
): Promise<void> {
|
||||
let activeCursorReq = cursorReq;
|
||||
let fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
let fullText = (await sendCursorRequestFull(activeCursorReq)).text;
|
||||
const hasTools = (anthropicReq.tools?.length ?? 0) > 0;
|
||||
|
||||
// Thinking 提取
|
||||
@@ -1790,7 +1790,7 @@ async function handleResponsesNonStream(
|
||||
const retryBody = buildRetryRequest(anthropicReq, attempt);
|
||||
const retryCursorReq = await convertToCursorRequest(retryBody);
|
||||
activeCursorReq = retryCursorReq;
|
||||
fullText = await sendCursorRequestFull(activeCursorReq);
|
||||
fullText = (await sendCursorRequestFull(activeCursorReq)).text;
|
||||
if (hasLeadingThinking(fullText)) {
|
||||
fullText = extractThinking(fullText).strippedText;
|
||||
}
|
||||
|
||||
@@ -116,7 +116,7 @@ export interface AppConfig {
|
||||
authTokens?: string[]; // API 鉴权 token 列表,为空则不鉴权
|
||||
maxAutoContinue: number; // 自动续写最大次数,默认 3,设 0 禁用
|
||||
maxHistoryMessages: number; // 历史消息条数硬限制,默认 -1(不限制)
|
||||
maxHistoryTokens: number; // 历史消息 token 数上限(js-tiktoken 估算),默认 130000,-1 不限制
|
||||
maxHistoryTokens: number; // 历史消息 token 数上限(tiktoken 估算我们发出的内容,代码自动加 Cursor 后端开销:1300 基础 + perTool*工具数),默认 150000,-1 不限制
|
||||
vision?: {
|
||||
enabled: boolean;
|
||||
mode: 'ocr' | 'api';
|
||||
|
||||
@@ -136,7 +136,7 @@ open http://localhost:3010/vuelogs
|
||||
| 基础 | `timeout` | 请求超时(秒) |
|
||||
| 基础 | `max_auto_continue` | 自动续写次数 |
|
||||
| 基础 | `max_history_messages` | 历史消息条数上限(建议改用 max_history_tokens) |
|
||||
| 基础 | `max_history_tokens` | 历史消息 token 数上限(推荐),参考值 120000~140000(tiktoken 与 Claude 实际 tokenizer 有差异,建议观察 UI 日志实际值后调整) |
|
||||
| 基础 | `max_history_tokens` | 历史消息 token 数上限(推荐),代码自动补偿 Cursor 后端开销(1,300 基础 + 工具 tokenizer 差异,动态计算),参考值 130000~170000,默认 150000 |
|
||||
| 功能 | `thinking.enabled` | Thinking 模式(跟随客户端/强制关闭/强制开启) |
|
||||
| 功能 | `sanitize_response` | 响应内容清洗 |
|
||||
| 历史压缩 | `compression.*` | 压缩开关、级别、保留条数等 |
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
<Field label="max_history_messages" desc="按条数裁剪历史(保留工具 few-shot 示例)。注意:条数无法反映实际 token 体积,建议改用下方的 max_history_tokens。-1 不限制">
|
||||
<input v-model.number="draft.max_history_messages" type="number" min="-1" class="inp" />
|
||||
</Field>
|
||||
<Field label="max_history_tokens" desc="按 token 数裁剪历史(推荐)。从最早消息整条删除,有助于减少超出 Cursor 上下文的概率。注意:tiktoken 与 Claude 实际 tokenizer 有差异,低估约 10~20%,默认 130000,参考值 120000~140000,建议观察 UI 日志的实际输入 tokens 后调整。-1 不限制">
|
||||
<Field label="max_history_tokens" desc="按 token 数裁剪历史(推荐)。从最早消息整条删除,有助于减少超出 Cursor 上下文的概率。代码自动补偿 Cursor 后端开销(1,300 基础 + 工具 tokenizer 差异,动态计算),默认 150000,参考值 130000~170000。-1 不限制">
|
||||
<input v-model.number="draft.max_history_tokens" type="number" min="-1" class="inp" />
|
||||
</Field>
|
||||
</Group>
|
||||
|
||||
Reference in New Issue
Block a user