fix: harden OpenAI multimodal compatibility and image handling

Tighten image path normalization, preserve multimodal request content across OpenAI-compatible endpoints, and fail fast on unsupported image_file inputs so clients get predictable behavior instead of silent degradation.

Made-with: Cursor
This commit is contained in:
小海
2026-03-17 15:03:39 +08:00
parent 447dad8c03
commit ed6181a5a9
10 changed files with 693 additions and 47 deletions

View File

@@ -7,10 +7,14 @@
"dev": "tsx watch src/index.ts",
"build": "tsc",
"start": "node dist/index.js",
"test:image-paths": "node test/unit-image-paths.mjs",
"test:openai-image-file": "node test/unit-openai-image-file.mjs",
"test:openai-chat-input": "node test/unit-openai-chat-input.mjs",
"test:vision": "node test/unit-vision.mjs",
"test:unit": "node test/unit-tolerant-parse.mjs",
"test:tool-fixer": "node test/unit-tool-fixer.mjs",
"test:openai-compat": "node test/unit-openai-compat.mjs",
"test:all": "node test/unit-tolerant-parse.mjs && node test/unit-tool-fixer.mjs && node test/unit-openai-compat.mjs && node test/unit-proxy-agent.mjs",
"test:all": "node test/unit-tolerant-parse.mjs && node test/unit-tool-fixer.mjs && node test/unit-openai-compat.mjs && node test/unit-proxy-agent.mjs && node test/unit-image-paths.mjs && node test/unit-vision.mjs && node test/unit-openai-chat-input.mjs && node test/unit-openai-image-file.mjs",
"test:e2e": "node test/e2e-chat.mjs",
"test:agentic": "node test/e2e-agentic.mjs"
},

View File

@@ -957,6 +957,22 @@ function shortId(): string {
return uuidv4().replace(/-/g, '').substring(0, 16);
}
function normalizeFileUrlToLocalPath(url: string): string {
if (!url.startsWith('file:///')) return url;
const rawPath = url.slice('file:///'.length);
let decodedPath = rawPath;
try {
decodedPath = decodeURIComponent(rawPath);
} catch {
// 忽略非法编码,保留原始路径
}
return /^[A-Za-z]:[\\/]/.test(decodedPath)
? decodedPath
: '/' + decodedPath;
}
// ==================== 图片预处理 ====================
/**
@@ -991,6 +1007,15 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
console.log(`[Converter] 🔄 归一化 Anthropic URL 图片: source.url → source.data`);
}
// ★ file:// 本地文件 URL → 归一化为系统路径,复用后续本地文件读取逻辑
if (block.source?.type === 'url' && typeof block.source.data === 'string' && block.source.data.startsWith('file:///')) {
block.source.data = normalizeFileUrlToLocalPath(block.source.data);
if (!block.source.media_type) {
block.source.media_type = guessMediaType(block.source.data);
}
console.log(`[Converter] 🔄 修正 file:// URL → 本地路径: ${block.source.data.substring(0, 120)}`);
}
// ★ 兜底source.data 是完整 data: URI 但 type 仍标为 'url'
if (block.source?.type === 'url' && block.source.data?.startsWith('data:')) {
const match = block.source.data.match(/^data:([^;]+);base64,(.+)$/);
@@ -1011,7 +1036,8 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
// B) content 是数组,但 text block 中嵌入了路径
// 支持格式:
// - 本地文件路径: /Users/.../file_362---eb90f5a2.jpg含连字符、UUID
// - file:// URL: file:///Users/.../file.jpg
// - Windows 本地路径: C:\Users\...\file.jpg / C:/Users/.../file.jpg
// - file:// URL: file:///Users/.../file.jpg / file:///C:/Users/.../file.jpg
// - HTTP(S) URL 以图片后缀结尾
//
// 使用 [^\s"')\]] 匹配路径中任意非空白/非引号字符(包括 -、UUID、中文等
@@ -1020,21 +1046,31 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
/** 从文本中提取所有图片 URL/路径 */
function extractImageUrlsFromText(text: string): string[] {
const urls: string[] = [];
// file:// URLs → /path
// file:// URLs → 本地路径
const fileRe = /file:\/\/\/([^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg))/gi;
for (const m of text.matchAll(fileRe)) {
urls.push('/' + m[1]);
const normalizedPath = normalizeFileUrlToLocalPath(`file:///${m[1]}`);
urls.push(normalizedPath);
}
// HTTP(S) URLs
const httpRe = /(https?:\/\/[^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg)(?:\?[^\s"')\]]*)?)/gi;
for (const m of text.matchAll(httpRe)) {
if (!urls.includes(m[1])) urls.push(m[1]);
}
// 本地绝对路径 (/开头,支持 UUID、连字符等)
const localRe = /(?:^|[\s"'(\[,:])(\/[^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg))/gi;
// 本地绝对路径Unix /path 或 Windows C:\path / C:/path排除协议相对 URL//example.com/a.jpg
const localRe = /(?:^|[\s"'(\[,:])((?:\/(?!\/)|[A-Za-z]:[\\/])[^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg))/gi;
for (const m of text.matchAll(localRe)) {
const path = m[1].trim();
if (!urls.includes(path)) urls.push(path);
const localPath = m[1].trim();
const fullMatch = m[0];
const matchStart = m.index ?? 0;
const pathOffsetInMatch = fullMatch.lastIndexOf(localPath);
const pathStart = matchStart + Math.max(pathOffsetInMatch, 0);
const beforePath = text.slice(Math.max(0, pathStart - 12), pathStart);
// 避免 file:///C:/foo.jpg 中的 /foo.jpg 被再次当作 Unix 路径提取
if (/file:\/\/\/[A-Za-z]:$/i.test(beforePath)) continue;
if (localPath.startsWith('//')) continue;
if (!urls.includes(localPath)) urls.push(localPath);
}
return [...new Set(urls)];
}
@@ -1129,8 +1165,8 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
if (block.source?.type === 'url' && block.source.data && !block.source.data.startsWith('data:')) {
const imageUrl = block.source.data;
// ★ 本地文件路径检测:/开头 或 ~/ 开头 或 Windows 绝对路径
const isLocalPath = /^(\/|~\/|[A-Za-z]:\\)/.test(imageUrl);
// ★ 本地文件路径检测:/开头 或 ~/ 开头 或 Windows 绝对路径(支持 \ 和 /
const isLocalPath = /^(\/|~\/|[A-Za-z]:[\\/])/.test(imageUrl);
if (isLocalPath) {
localImages++;

View File

@@ -50,6 +50,43 @@ function toolCallId(): string {
return 'call_' + uuidv4().replace(/-/g, '').substring(0, 24);
}
class OpenAIRequestError extends Error {
status: number;
type: string;
code: string;
constructor(message: string, status = 400, type = 'invalid_request_error', code = 'invalid_request') {
super(message);
this.name = 'OpenAIRequestError';
this.status = status;
this.type = type;
this.code = code;
}
}
function stringifyUnknownContent(value: unknown): string {
if (value === null || value === undefined) return '';
if (typeof value === 'string') return value;
if (typeof value === 'number' || typeof value === 'boolean' || typeof value === 'bigint') {
return String(value);
}
try {
return JSON.stringify(value);
} catch {
return String(value);
}
}
function unsupportedImageFileError(fileId?: string): OpenAIRequestError {
const suffix = fileId ? ` (file_id: ${fileId})` : '';
return new OpenAIRequestError(
`Unsupported content part: image_file${suffix}. This proxy does not support OpenAI Files API image references. Please send the image as image_url, input_image, data URI, or a local file path instead.`,
400,
'invalid_request_error',
'unsupported_content_part'
);
}
// ==================== 请求转换OpenAI → Anthropic ====================
/**
@@ -238,6 +275,7 @@ function toBlocks(content: string | AnthropicContentBlock[]): AnthropicContentBl
/**
* 从 OpenAI 消息中提取文本或多模态内容块
* 处理多种客户端格式:
* - 文本块: { type: 'text'|'input_text', text: '...' }
* - OpenAI 标准: { type: 'image_url', image_url: { url: '...' } }
* - Anthropic 透传: { type: 'image', source: { type: 'url', url: '...' } }
* - 部分客户端: { type: 'input_image', image_url: { url: '...' } }
@@ -248,7 +286,7 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
if (Array.isArray(msg.content)) {
const blocks: AnthropicContentBlock[] = [];
for (const p of msg.content as (OpenAIContentPart | Record<string, unknown>)[]) {
if (p.type === 'text' && (p as OpenAIContentPart).text) {
if ((p.type === 'text' || p.type === 'input_text') && (p as OpenAIContentPart).text) {
blocks.push({ type: 'text', text: (p as OpenAIContentPart).text! });
} else if (p.type === 'image_url' && (p as OpenAIContentPart).image_url?.url) {
const url = (p as OpenAIContentPart).image_url!.url;
@@ -310,11 +348,9 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
});
}
} else if (p.type === 'image_file' && (p as any).image_file) {
// ★ Assistants API 格式: { type: 'image_file', image_file: { file_id: '...', detail?: '...' } }
// file_id 无法直接使用,但记录下来以便调试
const fileId = (p as any).image_file.file_id;
console.log(`[OpenAI] ⚠️ 收到 image_file 格式 (file_id: ${fileId}),此格式需要 Files API 支持`);
blocks.push({ type: 'text', text: `[Image file reference: file_id=${fileId}. This format requires Files API support which is not available.]` });
const fileId = (p as any).image_file.file_id as string | undefined;
console.log(`[OpenAI] ⚠️ 收到不支持的 image_file 格式 (file_id: ${fileId || 'unknown'})`);
throw unsupportedImageFileError(fileId);
} else if ((p.type === 'image_url' || p.type === 'input_image') && (p as any).url) {
// ★ 扁平 URL 格式:某些客户端将 url 直接放在顶层而非 image_url.url
const url = (p as any).url as string;
@@ -368,7 +404,7 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
}
return blocks.length > 0 ? blocks : '';
}
return String(msg.content);
return stringifyUnknownContent(msg.content);
}
/**
@@ -459,11 +495,14 @@ export async function handleOpenAIChatCompletions(req: Request, res: Response):
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
log.fail(message);
res.status(500).json({
const status = err instanceof OpenAIRequestError ? err.status : 500;
const type = err instanceof OpenAIRequestError ? err.type : 'server_error';
const code = err instanceof OpenAIRequestError ? err.code : 'internal_error';
res.status(status).json({
error: {
message,
type: 'server_error',
code: 'internal_error',
type,
code,
},
});
}
@@ -1157,8 +1196,11 @@ export async function handleOpenAIResponses(req: Request, res: Response): Promis
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
console.error(`[OpenAI] /v1/responses 处理失败:`, message);
res.status(500).json({
error: { message, type: 'server_error', code: 'internal_error' },
const status = err instanceof OpenAIRequestError ? err.status : 500;
const type = err instanceof OpenAIRequestError ? err.type : 'server_error';
const code = err instanceof OpenAIRequestError ? err.code : 'internal_error';
res.status(status).json({
error: { message, type, code },
});
}
}
@@ -1628,26 +1670,29 @@ export function responsesToChatCompletions(body: Record<string, unknown>): OpenA
if (item.type === 'function_call_output') {
messages.push({
role: 'tool',
content: (item.output as string) || '',
content: stringifyUnknownContent(item.output),
tool_call_id: (item.call_id as string) || '',
});
continue;
}
const role = (item.role as string) || 'user';
if (role === 'system' || role === 'developer') {
const text = typeof item.content === 'string'
? item.content
: Array.isArray(item.content)
? (item.content as Array<Record<string, unknown>>).filter(b => b.type === 'input_text').map(b => b.text as string).join('\n')
: String(item.content || '');
const text = extractOpenAIContent({
role: 'system',
content: (item.content as string | OpenAIContentPart[] | null) ?? null,
} as OpenAIMessage);
messages.push({ role: 'system', content: text });
} else if (role === 'user') {
const content = typeof item.content === 'string'
? item.content
: Array.isArray(item.content)
? (item.content as Array<Record<string, unknown>>).filter(b => b.type === 'input_text').map(b => b.text as string).join('\n')
: String(item.content || '');
messages.push({ role: 'user', content });
const rawContent = (item.content as string | OpenAIContentPart[] | null) ?? null;
const normalizedContent = typeof rawContent === 'string'
? rawContent
: Array.isArray(rawContent) && rawContent.every(b => b.type === 'input_text')
? rawContent.map(b => b.text || '').join('\n')
: rawContent;
messages.push({
role: 'user',
content: normalizedContent || '',
});
} else if (role === 'assistant') {
const blocks = Array.isArray(item.content) ? item.content as Array<Record<string, unknown>> : [];
const text = blocks.filter(b => b.type === 'output_text').map(b => b.text as string).join('\n');

View File

@@ -31,9 +31,10 @@ export interface OpenAIMessage {
}
export interface OpenAIContentPart {
type: 'text' | 'image_url' | 'image' | 'input_image';
type: 'text' | 'input_text' | 'image_url' | 'image' | 'input_image' | 'image_file';
text?: string;
image_url?: { url: string; detail?: string };
image_file?: { file_id: string; detail?: string };
// Anthropic-style image source (when type === 'image')
source?: { type: string; media_type?: string; data?: string; url?: string };
}

View File

@@ -8,6 +8,7 @@ export async function applyVisionInterceptor(messages: AnthropicMessage[]): Prom
if (!config.vision?.enabled) return;
for (const msg of messages) {
if (msg.role !== 'user') continue;
if (!Array.isArray(msg.content)) continue;
let hasImages = false;

141
test/unit-image-paths.mjs Normal file
View File

@@ -0,0 +1,141 @@
/**
* test/unit-image-paths.mjs
*
* 单元测试:图片路径提取与本地路径识别
* 运行方式node test/unit-image-paths.mjs
*/
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(`${name}`);
passed++;
} catch (e) {
console.error(`${name}`);
console.error(` ${e.message}`);
failed++;
}
}
function assert(condition, msg) {
if (!condition) throw new Error(msg || 'Assertion failed');
}
function assertEqual(a, b, msg) {
const as = JSON.stringify(a), bs = JSON.stringify(b);
if (as !== bs) throw new Error(msg || `Expected ${bs}, got ${as}`);
}
function normalizeFileUrlToLocalPath(url) {
if (!url.startsWith('file:///')) return url;
const rawPath = url.slice('file:///'.length);
let decodedPath = rawPath;
try {
decodedPath = decodeURIComponent(rawPath);
} catch {
// 忽略非法编码,保留原始路径
}
return /^[A-Za-z]:[\\/]/.test(decodedPath)
? decodedPath
: '/' + decodedPath;
}
function extractImageUrlsFromText(text) {
const urls = [];
const fileRe = /file:\/\/\/([^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg))/gi;
for (const m of text.matchAll(fileRe)) {
const normalizedPath = normalizeFileUrlToLocalPath(`file:///${m[1]}`);
urls.push(normalizedPath);
}
const httpRe = /(https?:\/\/[^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg)(?:\?[^\s"')\]]*)?)/gi;
for (const m of text.matchAll(httpRe)) {
if (!urls.includes(m[1])) urls.push(m[1]);
}
const localRe = /(?:^|[\s"'(\[,:])((?:\/(?!\/)|[A-Za-z]:[\\/])[^\s"')\]]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg))/gi;
for (const m of text.matchAll(localRe)) {
const localPath = m[1].trim();
const fullMatch = m[0];
const matchStart = m.index ?? 0;
const pathOffsetInMatch = fullMatch.lastIndexOf(localPath);
const pathStart = matchStart + Math.max(pathOffsetInMatch, 0);
const beforePath = text.slice(Math.max(0, pathStart - 12), pathStart);
if (/file:\/\/\/[A-Za-z]:$/i.test(beforePath)) continue;
if (localPath.startsWith('//')) continue;
if (!urls.includes(localPath)) urls.push(localPath);
}
return [...new Set(urls)];
}
function isLocalPath(imageUrl) {
return /^(\/|~\/|[A-Za-z]:[\\/])/.test(imageUrl);
}
console.log('\n📦 [1] 协议相对 URL 排除\n');
test('不提取 //example.com/image.jpg', () => {
const text = 'look //example.com/image.jpg and https://example.com/real.jpg';
const urls = extractImageUrlsFromText(text);
assertEqual(urls, ['https://example.com/real.jpg']);
});
console.log('\n📦 [2] file:// Windows 路径归一化\n');
test('file:///C:/Users/name/a.jpg → C:/Users/name/a.jpg', () => {
const text = 'please inspect file:///C:/Users/name/a.jpg';
const urls = extractImageUrlsFromText(text);
assertEqual(urls, ['C:/Users/name/a.jpg']);
});
test('file:///Users/name/a.jpg → /Users/name/a.jpg', () => {
const text = 'please inspect file:///Users/name/a.jpg';
const urls = extractImageUrlsFromText(text);
assertEqual(urls, ['/Users/name/a.jpg']);
});
test('直接 image block 的 file:// URL 也能归一化', () => {
assertEqual(
normalizeFileUrlToLocalPath('file:///C:/Users/name/a.jpg'),
'C:/Users/name/a.jpg'
);
assertEqual(
normalizeFileUrlToLocalPath('file:///Users/name/a.jpg'),
'/Users/name/a.jpg'
);
});
console.log('\n📦 [3] Windows 本地路径识别\n');
test('提取 C:\\Users\\name\\a.jpg', () => {
const text = '看看这张图 C:\\Users\\name\\a.jpg';
const urls = extractImageUrlsFromText(text);
assertEqual(urls, ['C:\\Users\\name\\a.jpg']);
});
test('提取 C:/Users/name/a.jpg', () => {
const text = '看看这张图 C:/Users/name/a.jpg';
const urls = extractImageUrlsFromText(text);
assertEqual(urls, ['C:/Users/name/a.jpg']);
});
test('Windows 路径被视为本地文件', () => {
assert(isLocalPath('C:\\Users\\name\\a.jpg'), 'backslash path should be local');
assert(isLocalPath('C:/Users/name/a.jpg'), 'slash path should be local');
assert(isLocalPath(normalizeFileUrlToLocalPath('file:///C:/Users/name/a.jpg')), 'normalized file URL should be local');
assert(isLocalPath(normalizeFileUrlToLocalPath('file:///Users/name/a.jpg')), 'normalized unix file URL should be local');
});
console.log('\n' + '═'.repeat(55));
console.log(` 结果: ${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
console.log('═'.repeat(55) + '\n');
if (failed > 0) process.exit(1);

View File

@@ -0,0 +1,143 @@
/**
* test/unit-openai-chat-input.mjs
*
* 单元测试:/v1/chat/completions 输入内容块兼容性
* 运行方式node test/unit-openai-chat-input.mjs
*/
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(`${name}`);
passed++;
} catch (e) {
console.error(`${name}`);
console.error(` ${e.message}`);
failed++;
}
}
function assert(condition, msg) {
if (!condition) throw new Error(msg || 'Assertion failed');
}
function assertEqual(a, b, msg) {
const as = JSON.stringify(a), bs = JSON.stringify(b);
if (as !== bs) throw new Error(msg || `Expected ${bs}, got ${as}`);
}
function extractOpenAIContentBlocks(msg) {
if (msg.content === null || msg.content === undefined) return '';
if (typeof msg.content === 'string') return msg.content;
if (Array.isArray(msg.content)) {
const blocks = [];
for (const p of msg.content) {
if ((p.type === 'text' || p.type === 'input_text') && p.text) {
blocks.push({ type: 'text', text: p.text });
} else if (p.type === 'image_url' && p.image_url?.url) {
blocks.push({
type: 'image',
source: { type: 'url', media_type: 'image/jpeg', data: p.image_url.url },
});
} else if (p.type === 'input_image' && p.image_url?.url) {
blocks.push({
type: 'image',
source: { type: 'url', media_type: 'image/jpeg', data: p.image_url.url },
});
}
}
return blocks.length > 0 ? blocks : '';
}
return String(msg.content);
}
function extractOpenAIContent(msg) {
const blocks = extractOpenAIContentBlocks(msg);
if (typeof blocks === 'string') return blocks;
return blocks.filter(b => b.type === 'text').map(b => b.text).join('\n');
}
function convertToAnthropicRequest(body) {
const rawMessages = [];
let systemPrompt;
for (const msg of body.messages) {
switch (msg.role) {
case 'system':
systemPrompt = (systemPrompt ? systemPrompt + '\n\n' : '') + extractOpenAIContent(msg);
break;
case 'user': {
const contentBlocks = extractOpenAIContentBlocks(msg);
rawMessages.push({
role: 'user',
content: Array.isArray(contentBlocks) ? contentBlocks : (contentBlocks || ''),
});
break;
}
}
}
return {
system: systemPrompt,
messages: rawMessages,
};
}
console.log('\n📦 [1] chat.completions input_text 兼容\n');
test('user input_text 不应丢失', () => {
const req = convertToAnthropicRequest({
model: 'gpt-4.1',
messages: [{
role: 'user',
content: [
{ type: 'input_text', text: '请描述这张图' },
{ type: 'input_image', image_url: { url: 'https://example.com/a.jpg' } },
],
}],
});
assertEqual(req.messages.length, 1);
assert(Array.isArray(req.messages[0].content), 'content should be block array');
assertEqual(req.messages[0].content[0], { type: 'text', text: '请描述这张图' });
assertEqual(req.messages[0].content[1].type, 'image');
});
test('system input_text 应拼接进 system prompt', () => {
const req = convertToAnthropicRequest({
model: 'gpt-4.1',
messages: [{
role: 'system',
content: [
{ type: 'input_text', text: '你是一个严谨的助手。' },
{ type: 'input_text', text: '请直接回答。' },
],
}, {
role: 'user',
content: 'hi',
}],
});
assertEqual(req.system, '你是一个严谨的助手。\n请直接回答。');
});
test('传统 text 块仍然兼容', () => {
const req = convertToAnthropicRequest({
model: 'gpt-4.1',
messages: [{
role: 'user',
content: [{ type: 'text', text: 'hello' }],
}],
});
assertEqual(req.messages[0].content[0], { type: 'text', text: 'hello' });
});
console.log('\n' + '═'.repeat(55));
console.log(` 结果: ${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
console.log('═'.repeat(55) + '\n');
if (failed > 0) process.exit(1);

View File

@@ -34,6 +34,50 @@ function assertEqual(a, b, msg) {
if (as !== bs) throw new Error(msg || `Expected ${bs}, got ${as}`);
}
function stringifyUnknownContent(value) {
if (value === null || value === undefined) return '';
if (typeof value === 'string') return value;
if (typeof value === 'number' || typeof value === 'boolean' || typeof value === 'bigint') {
return String(value);
}
try {
return JSON.stringify(value);
} catch {
return String(value);
}
}
function extractOpenAIContentBlocks(msg) {
if (msg.content === null || msg.content === undefined) return '';
if (typeof msg.content === 'string') return msg.content;
if (Array.isArray(msg.content)) {
const blocks = [];
for (const p of msg.content) {
if ((p.type === 'text' || p.type === 'input_text') && p.text) {
blocks.push({ type: 'text', text: p.text });
} else if (p.type === 'image_url' && p.image_url?.url) {
blocks.push({
type: 'image',
source: { type: 'url', media_type: 'image/jpeg', data: p.image_url.url },
});
} else if (p.type === 'input_image' && p.image_url?.url) {
blocks.push({
type: 'image',
source: { type: 'url', media_type: 'image/jpeg', data: p.image_url.url },
});
}
}
return blocks.length > 0 ? blocks : '';
}
return stringifyUnknownContent(msg.content);
}
function extractOpenAIContent(msg) {
const blocks = extractOpenAIContentBlocks(msg);
if (typeof blocks === 'string') return blocks;
return blocks.filter(b => b.type === 'text').map(b => b.text).join('\n');
}
// ─── 内联 mergeConsecutiveRoles与 src/openai-handler.ts 保持同步)────
function toBlocks(content) {
if (typeof content === 'string') {
@@ -75,26 +119,29 @@ function responsesToChatCompletions(body) {
if (item.type === 'function_call_output') {
messages.push({
role: 'tool',
content: item.output || '',
content: stringifyUnknownContent(item.output),
tool_call_id: item.call_id || '',
});
continue;
}
const role = item.role || 'user';
if (role === 'system' || role === 'developer') {
const text = typeof item.content === 'string'
? item.content
: Array.isArray(item.content)
? item.content.filter(b => b.type === 'input_text').map(b => b.text).join('\n')
: String(item.content || '');
const text = extractOpenAIContent({
role: 'system',
content: item.content ?? null,
});
messages.push({ role: 'system', content: text });
} else if (role === 'user') {
const content = typeof item.content === 'string'
? item.content
: Array.isArray(item.content)
? item.content.filter(b => b.type === 'input_text').map(b => b.text).join('\n')
: String(item.content || '');
messages.push({ role: 'user', content });
const rawContent = item.content ?? null;
const normalizedContent = typeof rawContent === 'string'
? rawContent
: Array.isArray(rawContent) && rawContent.every(b => b.type === 'input_text')
? rawContent.map(b => b.text || '').join('\n')
: rawContent;
messages.push({
role: 'user',
content: normalizedContent,
});
} else if (role === 'assistant') {
const blocks = Array.isArray(item.content) ? item.content : [];
const text = blocks.filter(b => b.type === 'output_text').map(b => b.text).join('\n');
@@ -220,6 +267,24 @@ test('function_call_output → tool 消息', () => {
assertEqual(result.messages[2].tool_call_id, 'call_123');
});
test('function_call_output 对象 → JSON 字符串', () => {
const result = responsesToChatCompletions({
model: 'gpt-4',
input: [
{ role: 'user', content: 'Summarize tool output' },
{
type: 'function_call_output',
call_id: 'call_obj',
output: { files: ['a.ts', 'b.ts'], count: 2 }
},
],
});
assertEqual(result.messages.length, 2);
assertEqual(result.messages[1].role, 'tool');
assertEqual(result.messages[1].content, '{"files":["a.ts","b.ts"],"count":2}');
assertEqual(result.messages[1].tool_call_id, 'call_obj');
});
test('助手消息带 function_call → tool_calls', () => {
const result = responsesToChatCompletions({
model: 'gpt-4',
@@ -277,6 +342,25 @@ test('input_text content 数组', () => {
assertEqual(result.messages[0].content, 'Part 1\nPart 2');
});
test('Responses user input_image 不应丢失', () => {
const result = responsesToChatCompletions({
model: 'gpt-4',
input: [
{
role: 'user',
content: [
{ type: 'input_text', text: '请描述这张图' },
{ type: 'input_image', image_url: { url: 'https://example.com/image.jpg' } },
]
},
],
});
assertEqual(result.messages.length, 1);
assert(Array.isArray(result.messages[0].content), 'content should remain multimodal blocks');
assertEqual(result.messages[0].content[0], { type: 'input_text', text: '请描述这张图' });
assertEqual(result.messages[0].content[1], { type: 'input_image', image_url: { url: 'https://example.com/image.jpg' } });
});
test('stream 默认为 true', () => {
const result = responsesToChatCompletions({ model: 'gpt-4', input: 'hi' });
assertEqual(result.stream, true);

View File

@@ -0,0 +1,101 @@
/**
* test/unit-openai-image-file.mjs
*
* 单元测试image_file 输入应显式报错,而不是静默降级
* 运行方式node test/unit-openai-image-file.mjs
*/
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(`${name}`);
passed++;
} catch (e) {
console.error(`${name}`);
console.error(` ${e.message}`);
failed++;
}
}
function assert(condition, msg) {
if (!condition) throw new Error(msg || 'Assertion failed');
}
class OpenAIRequestError extends Error {
constructor(message, status = 400, type = 'invalid_request_error', code = 'invalid_request') {
super(message);
this.name = 'OpenAIRequestError';
this.status = status;
this.type = type;
this.code = code;
}
}
function unsupportedImageFileError(fileId) {
const suffix = fileId ? ` (file_id: ${fileId})` : '';
return new OpenAIRequestError(
`Unsupported content part: image_file${suffix}. This proxy does not support OpenAI Files API image references. Please send the image as image_url, input_image, data URI, or a local file path instead.`,
400,
'invalid_request_error',
'unsupported_content_part'
);
}
function extractOpenAIContentBlocks(msg) {
if (msg.content === null || msg.content === undefined) return '';
if (typeof msg.content === 'string') return msg.content;
if (Array.isArray(msg.content)) {
const blocks = [];
for (const p of msg.content) {
if (p.type === 'text' || p.type === 'input_text') {
if (p.text) blocks.push({ type: 'text', text: p.text });
} else if (p.type === 'image_file' && p.image_file) {
throw unsupportedImageFileError(p.image_file.file_id);
}
}
return blocks.length > 0 ? blocks : '';
}
return String(msg.content);
}
console.log('\n📦 [1] image_file 显式报错\n');
test('image_file 应抛出 OpenAIRequestError', () => {
let thrown;
try {
extractOpenAIContentBlocks({
role: 'user',
content: [
{ type: 'input_text', text: '请描述图片' },
{ type: 'image_file', image_file: { file_id: 'file_123' } },
],
});
} catch (e) {
thrown = e;
}
assert(thrown instanceof OpenAIRequestError, 'should throw OpenAIRequestError');
assert(thrown.message.includes('image_file'), 'message should mention image_file');
assert(thrown.message.includes('file_123'), 'message should include file_id');
assert(thrown.status === 400, 'status should be 400');
assert(thrown.type === 'invalid_request_error', 'type should be invalid_request_error');
assert(thrown.code === 'unsupported_content_part', 'code should be unsupported_content_part');
});
test('普通文本块仍可正常通过', () => {
const blocks = extractOpenAIContentBlocks({
role: 'user',
content: [{ type: 'input_text', text: 'hello' }],
});
assert(Array.isArray(blocks), 'blocks should be array');
assert(blocks[0].text === 'hello', 'text block should remain intact');
});
console.log('\n' + '═'.repeat(55));
console.log(` 结果: ${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
console.log('═'.repeat(55) + '\n');
if (failed > 0) process.exit(1);

90
test/unit-vision.mjs Normal file
View File

@@ -0,0 +1,90 @@
/**
* test/unit-vision.mjs
*
* 单元测试Vision 拦截器仅处理 user 图片消息
* 运行方式node test/unit-vision.mjs
*/
let passed = 0;
let failed = 0;
async function test(name, fn) {
try {
await fn();
console.log(`${name}`);
passed++;
} catch (e) {
console.error(`${name}`);
console.error(` ${e.message}`);
failed++;
}
}
function assert(condition, msg) {
if (!condition) throw new Error(msg || 'Assertion failed');
}
async function applyVisionInterceptor(messages) {
for (const msg of messages) {
if (msg.role !== 'user') continue;
if (!Array.isArray(msg.content)) continue;
const newContent = [];
const imagesToAnalyze = [];
for (const block of msg.content) {
if (block.type === 'image') {
imagesToAnalyze.push(block);
} else {
newContent.push(block);
}
}
if (imagesToAnalyze.length > 0) {
newContent.push({
type: 'text',
text: `[System: The user attached ${imagesToAnalyze.length} image(s). Visual analysis/OCR extracted the following context:\nmock vision result]`,
});
msg.content = newContent;
}
}
}
console.log('\n📦 [1] Vision 角色范围\n');
await test('仅处理 user 消息中的图片', async () => {
const messages = [
{
role: 'assistant',
content: [
{ type: 'text', text: 'assistant says hi' },
{ type: 'image', source: { type: 'url', data: 'https://example.com/a.jpg' } },
],
},
{
role: 'user',
content: [
{ type: 'text', text: 'please inspect this image' },
{ type: 'image', source: { type: 'url', data: 'https://example.com/b.jpg' } },
],
},
];
await applyVisionInterceptor(messages);
assert(messages[0].content.some(block => block.type === 'image'), 'assistant image should remain untouched');
assert(messages[1].content.every(block => block.type !== 'image'), 'user images should be converted away');
assert(messages[1].content.some(block => block.type === 'text' && block.text.includes('mock vision result')), 'user message should receive vision text');
});
await test('忽略非数组内容的 user 消息', async () => {
const messages = [{ role: 'user', content: 'plain text only' }];
await applyVisionInterceptor(messages);
assert(messages[0].content === 'plain text only', 'plain text content should stay unchanged');
});
console.log('\n' + '═'.repeat(55));
console.log(` 结果: ${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
console.log('═'.repeat(55) + '\n');
if (failed > 0) process.exit(1);