mirror of
https://github.com/7836246/cursor2api.git
synced 2026-06-01 19:39:47 +08:00
fix: 修复截断问题 - Schema压缩+JSON感知解析器+续写机制重写
三个关键修复: 1. Schema 压缩(converter.ts) - 新增 compactSchema() 将完整 JSON Schema 压缩为紧凑类型签名 - 90 工具的 Schema 从 ~135k chars 降至 ~15k chars - 工具描述截断至 200 chars - 直接增大 Cursor API 输出预算(输入越小→输出越大) 2. JSON-string-aware 解析器(converter.ts) - 替换 parseToolCalls 的 lazy regex 为手动扫描器 - 正确跳过 JSON 字符串内部的 ``` 标记 - 修复 Write/Edit 工具 content 含 markdown 代码块时被提前截断的 bug - 新增截断代码块恢复(无闭合 ``` 时仍可解析工具调用) 3. 续写机制重写(handler.ts) - 续写请求增加 user 引导消息(解决模型返回空响应的问题) - 每次基于原始消息快照重建(防止上下文膨胀) - 提取最后 300 chars 作为续写锚点 - 空响应时立即停止,避免无效循环 - MAX_AUTO_CONTINUE 从 4 提升至 6
This commit is contained in:
148
src/converter.ts
148
src/converter.ts
@@ -25,6 +25,42 @@ import { fixToolCallArguments } from './tool-fixer.js';
|
||||
|
||||
// ==================== 工具指令构建 ====================
|
||||
|
||||
/**
|
||||
* 将 JSON Schema 压缩为紧凑的类型签名
|
||||
* 目的:90 个工具的完整 JSON Schema 约 135,000 chars,压缩后约 15,000 chars
|
||||
* 这直接影响 Cursor API 的输出预算(输入越大,输出越少)
|
||||
*
|
||||
* 示例:
|
||||
* 完整: {"type":"object","properties":{"file_path":{"type":"string","description":"..."},"encoding":{"type":"string","enum":["utf-8","base64"]}},"required":["file_path"]}
|
||||
* 压缩: {file_path!: string, encoding?: utf-8|base64}
|
||||
*/
|
||||
function compactSchema(schema: Record<string, unknown>): string {
|
||||
if (!schema?.properties) return '{}';
|
||||
const props = schema.properties as Record<string, Record<string, unknown>>;
|
||||
const required = new Set((schema.required as string[]) || []);
|
||||
|
||||
const parts = Object.entries(props).map(([name, prop]) => {
|
||||
let type = (prop.type as string) || 'any';
|
||||
// enum 值直接展示(对正确生成参数至关重要)
|
||||
if (prop.enum) {
|
||||
type = (prop.enum as string[]).join('|');
|
||||
}
|
||||
// 数组类型标注 items 类型
|
||||
if (type === 'array' && prop.items) {
|
||||
const itemType = (prop.items as Record<string, unknown>).type || 'any';
|
||||
type = `${itemType}[]`;
|
||||
}
|
||||
// 嵌套对象简写
|
||||
if (type === 'object' && prop.properties) {
|
||||
type = compactSchema(prop as Record<string, unknown>);
|
||||
}
|
||||
const req = required.has(name) ? '!' : '?';
|
||||
return `${name}${req}: ${type}`;
|
||||
});
|
||||
|
||||
return `{${parts.join(', ')}}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将工具定义构建为格式指令
|
||||
* 使用 Cursor IDE 原生场景融合:不覆盖模型身份,而是顺应它在 IDE 内的角色
|
||||
@@ -37,8 +73,11 @@ function buildToolInstructions(
|
||||
if (!tools || tools.length === 0) return '';
|
||||
|
||||
const toolList = tools.map((tool) => {
|
||||
const schema = tool.input_schema ? JSON.stringify(tool.input_schema) : '{}';
|
||||
return `- **${tool.name}**: ${tool.description || 'No description'}\n Schema: ${schema}`;
|
||||
// ★ 使用紧凑 Schema 替代完整 JSON Schema 以大幅减小输入体积
|
||||
const schema = tool.input_schema ? compactSchema(tool.input_schema) : '{}';
|
||||
// 截断过长的工具描述(部分客户端的工具描述可达数千字符)
|
||||
const desc = (tool.description || 'No description').substring(0, 200);
|
||||
return `- **${tool.name}**: ${desc}\n Params: ${schema}`;
|
||||
}).join('\n');
|
||||
|
||||
// ★ tool_choice 强制约束
|
||||
@@ -530,36 +569,107 @@ function tolerantParse(jsonStr: string): any {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 ```json action 代码块中解析工具调用
|
||||
*
|
||||
* ★ 使用 JSON-string-aware 扫描器替代简单的正则匹配
|
||||
* 原因:Write/Edit 工具的 content 参数经常包含 markdown 代码块(``` 标记),
|
||||
* 简单的 lazy regex `/```json[\s\S]*?```/g` 会在 JSON 字符串内部的 ``` 处提前闭合,
|
||||
* 导致工具参数被截断(例如一个 5000 字的文件只保留前几行)
|
||||
*/
|
||||
export function parseToolCalls(responseText: string): {
|
||||
toolCalls: ParsedToolCall[];
|
||||
cleanText: string;
|
||||
} {
|
||||
const toolCalls: ParsedToolCall[] = [];
|
||||
let cleanText = responseText;
|
||||
const blocksToRemove: Array<{ start: number; end: number }> = [];
|
||||
|
||||
const fullBlockRegex = /```json(?:\s+action)?\s*([\s\S]*?)\s*```/g;
|
||||
// 查找所有 ```json (action)? 开头的位置
|
||||
const openPattern = /```json(?:\s+action)?/g;
|
||||
let openMatch: RegExpExecArray | null;
|
||||
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = fullBlockRegex.exec(responseText)) !== null) {
|
||||
let isToolCall = false;
|
||||
try {
|
||||
const parsed = tolerantParse(match[1]);
|
||||
if (parsed.tool || parsed.name) {
|
||||
const name = parsed.tool || parsed.name;
|
||||
let args = parsed.parameters || parsed.arguments || parsed.input || {};
|
||||
args = fixToolCallArguments(name, args);
|
||||
toolCalls.push({ name, arguments: args });
|
||||
isToolCall = true;
|
||||
while ((openMatch = openPattern.exec(responseText)) !== null) {
|
||||
const blockStart = openMatch.index;
|
||||
const contentStart = blockStart + openMatch[0].length;
|
||||
|
||||
// 从内容起始处向前扫描,跳过 JSON 字符串内部的 ```
|
||||
let pos = contentStart;
|
||||
let inJsonString = false;
|
||||
let escaped = false;
|
||||
let closingPos = -1;
|
||||
|
||||
while (pos < responseText.length - 2) {
|
||||
const char = responseText[pos];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[Converter] tolerantParse 失败:', e);
|
||||
|
||||
if (char === '\\' && inJsonString) {
|
||||
escaped = true;
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '"') {
|
||||
inJsonString = !inJsonString;
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 只在 JSON 字符串外部匹配闭合 ```
|
||||
if (!inJsonString && responseText.substring(pos, pos + 3) === '```') {
|
||||
closingPos = pos;
|
||||
break;
|
||||
}
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (isToolCall) {
|
||||
cleanText = cleanText.replace(match[0], '');
|
||||
if (closingPos >= 0) {
|
||||
const jsonContent = responseText.substring(contentStart, closingPos).trim();
|
||||
try {
|
||||
const parsed = tolerantParse(jsonContent);
|
||||
if (parsed.tool || parsed.name) {
|
||||
const name = parsed.tool || parsed.name;
|
||||
let args = parsed.parameters || parsed.arguments || parsed.input || {};
|
||||
args = fixToolCallArguments(name, args);
|
||||
toolCalls.push({ name, arguments: args });
|
||||
blocksToRemove.push({ start: blockStart, end: closingPos + 3 });
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[Converter] tolerantParse 失败:', e);
|
||||
}
|
||||
} else {
|
||||
// 没有闭合 ``` — 代码块被截断,尝试解析已有内容
|
||||
const jsonContent = responseText.substring(contentStart).trim();
|
||||
if (jsonContent.length > 10) {
|
||||
try {
|
||||
const parsed = tolerantParse(jsonContent);
|
||||
if (parsed.tool || parsed.name) {
|
||||
const name = parsed.tool || parsed.name;
|
||||
let args = parsed.parameters || parsed.arguments || parsed.input || {};
|
||||
args = fixToolCallArguments(name, args);
|
||||
toolCalls.push({ name, arguments: args });
|
||||
blocksToRemove.push({ start: blockStart, end: responseText.length });
|
||||
console.log(`[Converter] ⚠️ 从截断的代码块中恢复工具调用: ${name}`);
|
||||
}
|
||||
} catch {
|
||||
console.log(`[Converter] 截断的代码块无法解析为工具调用`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 从后往前移除已解析的代码块,保留 cleanText
|
||||
let cleanText = responseText;
|
||||
for (let i = blocksToRemove.length - 1; i >= 0; i--) {
|
||||
const block = blocksToRemove[i];
|
||||
cleanText = cleanText.substring(0, block.start) + cleanText.substring(block.end);
|
||||
}
|
||||
|
||||
return { toolCalls, cleanText: cleanText.trim() };
|
||||
}
|
||||
|
||||
|
||||
@@ -413,12 +413,18 @@ export function isTruncated(text: string): boolean {
|
||||
// 代码块未闭合
|
||||
const codeBlockOpen = (trimmed.match(/```/g) || []).length % 2 !== 0;
|
||||
if (codeBlockOpen) return true;
|
||||
// 检测 ```json action 块已开始但 JSON 对象未闭合(截断发生在工具调用参数中间)
|
||||
const jsonActionBlocks = trimmed.match(/```json\s+action[\s\S]*?```/g) || [];
|
||||
const jsonActionOpens = (trimmed.match(/```json\s+action/g) || []).length;
|
||||
if (jsonActionOpens > jsonActionBlocks.length) return true;
|
||||
// XML/HTML 标签未闭合 (Cursor 有时在中途截断)
|
||||
const openTags = (trimmed.match(/^<[a-zA-Z]/gm) || []).length;
|
||||
const closeTags = (trimmed.match(/^<\/[a-zA-Z]/gm) || []).length;
|
||||
if (openTags > closeTags + 1) return true;
|
||||
// 以逗号、分号、冒号、开括号结尾(明显未完成)
|
||||
if (/[,;:\[{(]\s*$/.test(trimmed)) return true;
|
||||
// 长响应以反斜杠 + n 结尾(JSON 字符串中间被截断)
|
||||
if (trimmed.length > 2000 && /\\n?\s*$/.test(trimmed) && !trimmed.endsWith('```')) return true;
|
||||
// 短响应且以小写字母结尾(句子被截断的强烈信号)
|
||||
if (trimmed.length < 500 && /[a-z]$/.test(trimmed)) return false; // 短响应不判断
|
||||
return false;
|
||||
@@ -556,21 +562,46 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
|
||||
// 流完成后,处理完整响应
|
||||
// ★ 内部截断续写:如果模型输出过长被截断(常见于写大文件),Proxy 内部分段续写,然后拼接成完整响应
|
||||
// 这样可以确保工具调用(如 Write)不会横跨两次 API 响应而退化为纯文本
|
||||
const MAX_AUTO_CONTINUE = 4;
|
||||
const MAX_AUTO_CONTINUE = 6;
|
||||
let continueCount = 0;
|
||||
|
||||
// 保存原始请求的消息快照(不含续写追加的消息)
|
||||
const originalMessages = [...activeCursorReq.messages];
|
||||
|
||||
while (hasTools && isTruncated(fullResponse) && continueCount < MAX_AUTO_CONTINUE) {
|
||||
continueCount++;
|
||||
const prevLength = fullResponse.length;
|
||||
console.log(`[Handler] ⚠️ 内部检测到截断 (${fullResponse.length} chars),Proxy 将隐式请求无缝续写 (第${continueCount}次)...`);
|
||||
|
||||
// 构造续写请求:让 assistant 成为最后一条消息,模型会隐式无缝衔接
|
||||
// 提取截断点的最后一段文本作为上下文锚点
|
||||
const anchorLength = Math.min(300, fullResponse.length);
|
||||
const anchorText = fullResponse.slice(-anchorLength);
|
||||
|
||||
// 构造续写请求:原始消息 + 截断的 assistant 回复 + user 续写引导
|
||||
// 每次重建而非累积,防止上下文膨胀
|
||||
const continuationPrompt = `Your previous response was cut off mid-output. The last part of your output was:
|
||||
|
||||
\`\`\`
|
||||
...${anchorText}
|
||||
\`\`\`
|
||||
|
||||
Continue EXACTLY from where you stopped. DO NOT repeat any content already generated. DO NOT restart the response. Output ONLY the remaining content, starting immediately from the cut-off point.`;
|
||||
|
||||
activeCursorReq = {
|
||||
...activeCursorReq,
|
||||
messages: [...activeCursorReq.messages, {
|
||||
parts: [{ type: 'text', text: fullResponse }],
|
||||
id: uuidv4(),
|
||||
role: 'assistant',
|
||||
}],
|
||||
messages: [
|
||||
...originalMessages,
|
||||
{
|
||||
parts: [{ type: 'text', text: fullResponse }],
|
||||
id: uuidv4(),
|
||||
role: 'assistant',
|
||||
},
|
||||
{
|
||||
parts: [{ type: 'text', text: continuationPrompt }],
|
||||
id: uuidv4(),
|
||||
role: 'user',
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let continuationResponse = '';
|
||||
@@ -579,7 +610,14 @@ async function handleStream(res: Response, cursorReq: CursorChatRequest, body: A
|
||||
continuationResponse += event.delta;
|
||||
}
|
||||
});
|
||||
|
||||
if (continuationResponse.trim().length === 0) {
|
||||
console.log(`[Handler] ⚠️ 续写返回空响应,停止续写`);
|
||||
break;
|
||||
}
|
||||
|
||||
fullResponse += continuationResponse;
|
||||
console.log(`[Handler] 续写拼接完成: ${prevLength} → ${fullResponse.length} chars (+${continuationResponse.length})`);
|
||||
}
|
||||
|
||||
let stopReason = (hasTools && isTruncated(fullResponse)) ? 'max_tokens' : 'end_turn';
|
||||
|
||||
Reference in New Issue
Block a user