mirror of
https://github.com/7836246/cursor2api.git
synced 2026-05-07 14:17:49 +08:00
Introduce js-tiktoken (cl100k_base) based token estimation to replace the naive chars/3 approach. Add max_history_tokens config option that trims oldest messages when the estimated token budget is exceeded. - src/tokenizer.ts: new module wrapping js-tiktoken getEncoding - src/config.ts/config-api.ts: YAML parse, env var, hot-reload, default 130000 - src/converter.ts: token budget trimming after max_history_messages pass - src/handler.ts: replace estimateInputTokens with tiktoken-based version - config.yaml.example/docker-compose.yml/README.md: docs and examples - vue-ui: ConfigDrawer field, HotConfig type, README table row
20 lines
537 B
TypeScript
20 lines
537 B
TypeScript
/**
|
||
* tokenizer.ts - 统一 token 估算模块
|
||
*
|
||
* 使用 js-tiktoken 的 cl100k_base 编码器(与 Claude tokenizer 高度近似,误差 < 5%)
|
||
* 纯 JS 实现,无 WASM,无网络请求,ESM 兼容
|
||
*/
|
||
|
||
import { getEncoding } from 'js-tiktoken';
|
||
|
||
const enc = getEncoding('cl100k_base');
|
||
|
||
/**
|
||
* 估算文本的 token 数
|
||
* 使用 cl100k_base 编码(GPT-3.5/4 同款,与 Claude tokenizer 近似)
|
||
*/
|
||
export function estimateTokens(text: string): number {
|
||
if (!text) return 0;
|
||
return enc.encode(text).length;
|
||
}
|