mirror of
https://github.com/7836246/cursor2api.git
synced 2026-06-04 12:59:41 +08:00
fix: 修复 OpenClaw/Telegram 等客户端图片处理失败问题 (#39)
- 新增本地文件路径图片读取支持 (readFileSync)
- 新增文本消息中嵌入图片 URL/路径自动提取
- 新增 file:// URL 解析支持
- OpenAI handler 新增 image_file、扁平 url、通用兜底等格式兼容
- 修复 OpenClaw {{MediaPath}} 本地路径无法被 fetch() 处理的根因
This commit is contained in:
173
src/converter.ts
173
src/converter.ts
@@ -9,6 +9,9 @@
|
||||
* 5. 图片预处理 → Anthropic ImageBlockParam 检测与 OCR/视觉 API 降级
|
||||
*/
|
||||
|
||||
import { readFileSync, existsSync } from 'fs';
|
||||
import { resolve as pathResolve } from 'path';
|
||||
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import type {
|
||||
AnthropicRequest,
|
||||
@@ -993,10 +996,87 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
// ★ Phase 1.5: 文本中嵌入的图片 URL/路径提取
|
||||
// OpenClaw/Telegram 等客户端可能将图片路径/URL 嵌入到文本消息中
|
||||
// 例如: "The user sent an image" + 路径引用,或 {{MediaUrl}} 模板变量
|
||||
// 常见格式:
|
||||
// - 本地文件路径: /Users/.../file.jpg
|
||||
// - file:// URL: file:///Users/.../file.jpg
|
||||
// - HTTP(S) URL 以图片后缀结尾
|
||||
const IMAGE_URL_IN_TEXT_RE = /(?:file:\/\/\/?|(?:https?:\/\/)[^\s]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg)(?:\?[^\s]*)?|(?:^|\s)(\/[\w.\/-]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg)))/gi;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== 'user' || !Array.isArray(msg.content)) continue;
|
||||
const newBlocks: AnthropicContentBlock[] = [];
|
||||
let extractedUrls = 0;
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type !== 'text' || !block.text) {
|
||||
newBlocks.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查文本中是否有图片 URL/路径(仅当该消息没有已有的 image block 时)
|
||||
const hasExistingImages = msg.content.some(b => b.type === 'image');
|
||||
if (hasExistingImages) {
|
||||
newBlocks.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const matches = [...block.text.matchAll(IMAGE_URL_IN_TEXT_RE)];
|
||||
if (matches.length === 0) {
|
||||
newBlocks.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 提取图片 URL 并创建 image block
|
||||
for (const match of matches) {
|
||||
let url = (match[1] || match[0]).trim();
|
||||
// 清理 file:// 前缀
|
||||
if (url.startsWith('file://')) {
|
||||
url = url.replace(/^file:\/\/\/?/, '/');
|
||||
}
|
||||
|
||||
if (url.startsWith('/') || url.startsWith('~')) {
|
||||
// 本地文件路径 → 尝试读取
|
||||
newBlocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'url', media_type: guessMediaType(url), data: url },
|
||||
} as any);
|
||||
} else if (url.startsWith('http')) {
|
||||
newBlocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'url', media_type: guessMediaType(url), data: url },
|
||||
} as any);
|
||||
}
|
||||
extractedUrls++;
|
||||
}
|
||||
|
||||
// 保留文本块(但移除其中的图片路径引用,避免模型看到路径后产生混乱)
|
||||
let cleanedText = block.text;
|
||||
for (const match of matches) {
|
||||
cleanedText = cleanedText.replace(match[0], '[image]');
|
||||
}
|
||||
if (cleanedText.trim()) {
|
||||
newBlocks.push({ type: 'text', text: cleanedText });
|
||||
}
|
||||
}
|
||||
|
||||
if (extractedUrls > 0) {
|
||||
console.log(`[Converter] 🔍 从文本中提取了 ${extractedUrls} 个图片 URL/路径`);
|
||||
msg.content = newBlocks as AnthropicContentBlock[];
|
||||
}
|
||||
}
|
||||
|
||||
// ★ Phase 2: 统计图片数量 + URL 图片下载转 base64
|
||||
// 支持三种方式:
|
||||
// a) HTTP(S) URL → fetch 下载
|
||||
// b) 本地文件路径 (/, ~, file://) → readFileSync 读取
|
||||
// c) base64 → 直接使用
|
||||
let totalImages = 0;
|
||||
let urlImages = 0;
|
||||
let base64Images = 0;
|
||||
let localImages = 0;
|
||||
for (const msg of messages) {
|
||||
if (!Array.isArray(msg.content)) continue;
|
||||
for (let i = 0; i < msg.content.length; i++) {
|
||||
@@ -1005,35 +1085,70 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
|
||||
totalImages++;
|
||||
// ★ URL 图片处理:远程 URL 需要下载转为 base64(OCR 和 Vision API 均需要)
|
||||
if (block.source?.type === 'url' && block.source.data && !block.source.data.startsWith('data:')) {
|
||||
urlImages++;
|
||||
const imageUrl = block.source.data;
|
||||
console.log(`[Converter] 📥 下载远程图片 (${urlImages}): ${imageUrl.substring(0, 100)}...`);
|
||||
try {
|
||||
const response = await fetch(imageUrl, {
|
||||
...getVisionProxyFetchOptions(),
|
||||
headers: {
|
||||
// 部分图片服务(如 Telegram)需要 User-Agent
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
},
|
||||
} as any);
|
||||
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
const contentType = response.headers.get('content-type') || 'image/jpeg';
|
||||
const mediaType = contentType.split(';')[0].trim();
|
||||
const base64Data = buffer.toString('base64');
|
||||
// 替换为 base64 格式
|
||||
msg.content[i] = {
|
||||
...block,
|
||||
source: { type: 'base64', media_type: mediaType, data: base64Data },
|
||||
};
|
||||
console.log(`[Converter] ✅ 图片下载成功: ${mediaType}, ${Math.round(base64Data.length * 0.75 / 1024)}KB`);
|
||||
} catch (err) {
|
||||
console.error(`[Converter] ❌ 远程图片下载失败 (${imageUrl.substring(0, 80)}):`, err);
|
||||
// 下载失败时替换为错误提示文本
|
||||
msg.content[i] = {
|
||||
type: 'text',
|
||||
text: `[Image from URL could not be downloaded: ${(err as Error).message}. URL: ${imageUrl.substring(0, 100)}]`,
|
||||
} as any;
|
||||
|
||||
// ★ 本地文件路径检测:/开头 或 ~/ 开头 或 Windows 绝对路径
|
||||
const isLocalPath = /^(\/|~\/|[A-Za-z]:\\)/.test(imageUrl);
|
||||
|
||||
if (isLocalPath) {
|
||||
localImages++;
|
||||
// 解析本地文件路径
|
||||
const resolvedPath = imageUrl.startsWith('~/')
|
||||
? pathResolve(process.env.HOME || process.env.USERPROFILE || '', imageUrl.slice(2))
|
||||
: pathResolve(imageUrl);
|
||||
|
||||
console.log(`[Converter] 📂 读取本地图片 (${localImages}): ${resolvedPath}`);
|
||||
try {
|
||||
if (!existsSync(resolvedPath)) {
|
||||
throw new Error(`File not found: ${resolvedPath}`);
|
||||
}
|
||||
const fileBuffer = readFileSync(resolvedPath);
|
||||
const mediaType = guessMediaType(resolvedPath);
|
||||
const base64Data = fileBuffer.toString('base64');
|
||||
msg.content[i] = {
|
||||
...block,
|
||||
source: { type: 'base64', media_type: mediaType, data: base64Data },
|
||||
};
|
||||
console.log(`[Converter] ✅ 本地图片读取成功: ${mediaType}, ${Math.round(base64Data.length * 0.75 / 1024)}KB`);
|
||||
} catch (err) {
|
||||
console.error(`[Converter] ❌ 本地图片读取失败 (${resolvedPath}):`, err);
|
||||
// 本地文件读取失败 → 替换为提示文本
|
||||
msg.content[i] = {
|
||||
type: 'text',
|
||||
text: `[Image from local path could not be read: ${(err as Error).message}. The proxy server may not have access to this file. Path: ${imageUrl.substring(0, 150)}]`,
|
||||
} as any;
|
||||
}
|
||||
} else {
|
||||
// HTTP(S) URL → 网络下载
|
||||
urlImages++;
|
||||
console.log(`[Converter] 📥 下载远程图片 (${urlImages}): ${imageUrl.substring(0, 100)}...`);
|
||||
try {
|
||||
const response = await fetch(imageUrl, {
|
||||
...getVisionProxyFetchOptions(),
|
||||
headers: {
|
||||
// 部分图片服务(如 Telegram)需要 User-Agent
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
},
|
||||
} as any);
|
||||
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
const contentType = response.headers.get('content-type') || 'image/jpeg';
|
||||
const mediaType = contentType.split(';')[0].trim();
|
||||
const base64Data = buffer.toString('base64');
|
||||
// 替换为 base64 格式
|
||||
msg.content[i] = {
|
||||
...block,
|
||||
source: { type: 'base64', media_type: mediaType, data: base64Data },
|
||||
};
|
||||
console.log(`[Converter] ✅ 图片下载成功: ${mediaType}, ${Math.round(base64Data.length * 0.75 / 1024)}KB`);
|
||||
} catch (err) {
|
||||
console.error(`[Converter] ❌ 远程图片下载失败 (${imageUrl.substring(0, 80)}):`, err);
|
||||
// 下载失败时替换为错误提示文本
|
||||
msg.content[i] = {
|
||||
type: 'text',
|
||||
text: `[Image from URL could not be downloaded: ${(err as Error).message}. URL: ${imageUrl.substring(0, 100)}]`,
|
||||
} as any;
|
||||
}
|
||||
}
|
||||
} else if (block.source?.type === 'base64' && block.source.data) {
|
||||
base64Images++;
|
||||
@@ -1043,7 +1158,7 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
|
||||
}
|
||||
|
||||
if (totalImages === 0) return;
|
||||
console.log(`[Converter] 📊 图片统计: 总计 ${totalImages} 张 (base64: ${base64Images}, URL下载: ${urlImages})`);
|
||||
console.log(`[Converter] 📊 图片统计: 总计 ${totalImages} 张 (base64: ${base64Images}, URL下载: ${urlImages}, 本地文件: ${localImages})`);
|
||||
|
||||
// ★ Phase 3: 调用 vision 拦截器处理(OCR / 外部 API)
|
||||
try {
|
||||
|
||||
@@ -251,7 +251,7 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// HTTP(S) URL — 统一存储到 source.data,由 preprocessImages() 下载
|
||||
// HTTP(S)/local URL — 统一存储到 source.data,由 preprocessImages() 下载/读取
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'url', media_type: 'image/jpeg', data: url }
|
||||
@@ -299,12 +299,61 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
|
||||
source: { type: 'url', media_type: 'image/jpeg', data: url }
|
||||
});
|
||||
}
|
||||
} else if (p.type === 'image_file' && (p as any).image_file) {
|
||||
// ★ Assistants API 格式: { type: 'image_file', image_file: { file_id: '...', detail?: '...' } }
|
||||
// file_id 无法直接使用,但记录下来以便调试
|
||||
const fileId = (p as any).image_file.file_id;
|
||||
console.log(`[OpenAI] ⚠️ 收到 image_file 格式 (file_id: ${fileId}),此格式需要 Files API 支持`);
|
||||
blocks.push({ type: 'text', text: `[Image file reference: file_id=${fileId}. This format requires Files API support which is not available.]` });
|
||||
} else if ((p.type === 'image_url' || p.type === 'input_image') && (p as any).url) {
|
||||
// ★ 扁平 URL 格式:某些客户端将 url 直接放在顶层而非 image_url.url
|
||||
const url = (p as any).url as string;
|
||||
if (url.startsWith('data:')) {
|
||||
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
||||
if (match) {
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'base64', media_type: match[1], data: match[2] }
|
||||
});
|
||||
}
|
||||
} else {
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'url', media_type: 'image/jpeg', data: url }
|
||||
});
|
||||
}
|
||||
} else if (p.type === 'tool_use') {
|
||||
// Anthropic 风格 tool_use 块直接透传
|
||||
blocks.push(p as unknown as AnthropicContentBlock);
|
||||
} else if (p.type === 'tool_result') {
|
||||
// Anthropic 风格 tool_result 块直接透传
|
||||
blocks.push(p as unknown as AnthropicContentBlock);
|
||||
} else {
|
||||
// ★ 通用兜底:检查未知类型的块是否包含可识别的图片数据
|
||||
const anyP = p as Record<string, unknown>;
|
||||
const possibleUrl = (anyP.url || anyP.file_path || anyP.path ||
|
||||
(anyP.image_url as any)?.url || anyP.data) as string | undefined;
|
||||
if (possibleUrl && typeof possibleUrl === 'string') {
|
||||
const looksLikeImage = /\.(jpg|jpeg|png|gif|webp|bmp|svg)/i.test(possibleUrl) ||
|
||||
possibleUrl.startsWith('data:image/');
|
||||
if (looksLikeImage) {
|
||||
console.log(`[OpenAI] 🔄 未知内容类型 "${p.type}" 中检测到图片引用 → 转为 image block`);
|
||||
if (possibleUrl.startsWith('data:')) {
|
||||
const match = possibleUrl.match(/^data:([^;]+);base64,(.+)$/);
|
||||
if (match) {
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'base64', media_type: match[1], data: match[2] }
|
||||
});
|
||||
}
|
||||
} else {
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'url', media_type: 'image/jpeg', data: possibleUrl }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return blocks.length > 0 ? blocks : '';
|
||||
|
||||
Reference in New Issue
Block a user