fix: 修复 OpenClaw/Telegram 等客户端图片处理失败问题 (#39)

- 新增本地文件路径图片读取支持 (readFileSync) - 新增文本消息中嵌入图片 URL/路径自动提取 - 新增 file:// URL 解析支持 - OpenAI handler 新增 image_file、扁平 url、通用兜底等格式兼容 - 修复 OpenClaw {{MediaPath}} 本地路径无法被 fetch() 处理的根因
2026-06-04 12:59:41 +08:00 · 2026-03-17 10:56:38 +08:00
parent a0af1c8934
commit e2acdd186d
2 changed files with 194 additions and 30 deletions
--- a/src/converter.ts
+++ b/src/converter.ts
@@ -9,6 +9,9 @@
 * 5. 图片预处理 → Anthropic ImageBlockParam 检测与 OCR/视觉 API 降级
 */

+import { readFileSync, existsSync } from 'fs';
+import { resolve as pathResolve } from 'path';
+
 import { v4 as uuidv4 } from 'uuid';
 import type {
    AnthropicRequest,
@@ -993,10 +996,87 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
        }
    }

+    // ★ Phase 1.5: 文本中嵌入的图片 URL/路径提取
+    // OpenClaw/Telegram 等客户端可能将图片路径/URL 嵌入到文本消息中
+    // 例如: "The user sent an image" + 路径引用，或 {{MediaUrl}} 模板变量
+    // 常见格式：
+    //   - 本地文件路径: /Users/.../file.jpg
+    //   - file:// URL: file:///Users/.../file.jpg
+    //   - HTTP(S) URL 以图片后缀结尾
+    const IMAGE_URL_IN_TEXT_RE = /(?:file:\/\/\/?|(?:https?:\/\/)[^\s]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg)(?:\?[^\s]*)?|(?:^|\s)(\/[\w.\/-]+\.(?:jpg|jpeg|png|gif|webp|bmp|svg)))/gi;
+
+    for (const msg of messages) {
+        if (msg.role !== 'user' || !Array.isArray(msg.content)) continue;
+        const newBlocks: AnthropicContentBlock[] = [];
+        let extractedUrls = 0;
+
+        for (const block of msg.content) {
+            if (block.type !== 'text' || !block.text) {
+                newBlocks.push(block);
+                continue;
+            }
+
+            // 检查文本中是否有图片 URL/路径（仅当该消息没有已有的 image block 时）
+            const hasExistingImages = msg.content.some(b => b.type === 'image');
+            if (hasExistingImages) {
+                newBlocks.push(block);
+                continue;
+            }
+
+            const matches = [...block.text.matchAll(IMAGE_URL_IN_TEXT_RE)];
+            if (matches.length === 0) {
+                newBlocks.push(block);
+                continue;
+            }
+
+            // 提取图片 URL 并创建 image block
+            for (const match of matches) {
+                let url = (match[1] || match[0]).trim();
+                // 清理 file:// 前缀
+                if (url.startsWith('file://')) {
+                    url = url.replace(/^file:\/\/\/?/, '/');
+                }
+
+                if (url.startsWith('/') || url.startsWith('~')) {
+                    // 本地文件路径 → 尝试读取
+                    newBlocks.push({
+                        type: 'image',
+                        source: { type: 'url', media_type: guessMediaType(url), data: url },
+                    } as any);
+                } else if (url.startsWith('http')) {
+                    newBlocks.push({
+                        type: 'image',
+                        source: { type: 'url', media_type: guessMediaType(url), data: url },
+                    } as any);
+                }
+                extractedUrls++;
+            }
+
+            // 保留文本块（但移除其中的图片路径引用，避免模型看到路径后产生混乱）
+            let cleanedText = block.text;
+            for (const match of matches) {
+                cleanedText = cleanedText.replace(match[0], '[image]');
+            }
+            if (cleanedText.trim()) {
+                newBlocks.push({ type: 'text', text: cleanedText });
+            }
+        }
+
+        if (extractedUrls > 0) {
+            console.log(`[Converter] 🔍 从文本中提取了 ${extractedUrls} 个图片 URL/路径`);
+            msg.content = newBlocks as AnthropicContentBlock[];
+        }
+    }
+
    // ★ Phase 2: 统计图片数量 + URL 图片下载转 base64
+    //   支持三种方式：
+    //   a) HTTP(S) URL → fetch 下载
+    //   b) 本地文件路径 (/, ~, file://) → readFileSync 读取
+    //   c) base64 → 直接使用
    let totalImages = 0;
    let urlImages = 0;
    let base64Images = 0;
+    let localImages = 0;
    for (const msg of messages) {
        if (!Array.isArray(msg.content)) continue;
        for (let i = 0; i < msg.content.length; i++) {
@@ -1005,35 +1085,70 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
                totalImages++;
                // ★ URL 图片处理：远程 URL 需要下载转为 base64（OCR 和 Vision API 均需要）
                if (block.source?.type === 'url' && block.source.data && !block.source.data.startsWith('data:')) {
-                    urlImages++;
                    const imageUrl = block.source.data;
-                    console.log(`[Converter] 📥 下载远程图片 (${urlImages}): ${imageUrl.substring(0, 100)}...`);
-                    try {
-                        const response = await fetch(imageUrl, {
-                            ...getVisionProxyFetchOptions(),
-                            headers: {
-                                // 部分图片服务（如 Telegram）需要 User-Agent
-                                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-                            },
-                        } as any);
-                        if (!response.ok) throw new Error(`HTTP ${response.status}`);
-                        const buffer = Buffer.from(await response.arrayBuffer());
-                        const contentType = response.headers.get('content-type') || 'image/jpeg';
-                        const mediaType = contentType.split(';')[0].trim();
-                        const base64Data = buffer.toString('base64');
-                        // 替换为 base64 格式
-                        msg.content[i] = {
-                            ...block,
-                            source: { type: 'base64', media_type: mediaType, data: base64Data },
-                        };
-                        console.log(`[Converter] ✅ 图片下载成功: ${mediaType}, ${Math.round(base64Data.length * 0.75 / 1024)}KB`);
-                    } catch (err) {
-                        console.error(`[Converter] ❌ 远程图片下载失败 (${imageUrl.substring(0, 80)}):`, err);
-                        // 下载失败时替换为错误提示文本
-                        msg.content[i] = {
-                            type: 'text',
-                            text: `[Image from URL could not be downloaded: ${(err as Error).message}. URL: ${imageUrl.substring(0, 100)}]`,
-                        } as any;
+
+                    // ★ 本地文件路径检测：/开头 或 ~/ 开头 或 Windows 绝对路径
+                    const isLocalPath = /^(\/|~\/|[A-Za-z]:\\)/.test(imageUrl);
+
+                    if (isLocalPath) {
+                        localImages++;
+                        // 解析本地文件路径
+                        const resolvedPath = imageUrl.startsWith('~/')
+                            ? pathResolve(process.env.HOME || process.env.USERPROFILE || '', imageUrl.slice(2))
+                            : pathResolve(imageUrl);
+
+                        console.log(`[Converter] 📂 读取本地图片 (${localImages}): ${resolvedPath}`);
+                        try {
+                            if (!existsSync(resolvedPath)) {
+                                throw new Error(`File not found: ${resolvedPath}`);
+                            }
+                            const fileBuffer = readFileSync(resolvedPath);
+                            const mediaType = guessMediaType(resolvedPath);
+                            const base64Data = fileBuffer.toString('base64');
+                            msg.content[i] = {
+                                ...block,
+                                source: { type: 'base64', media_type: mediaType, data: base64Data },
+                            };
+                            console.log(`[Converter] ✅ 本地图片读取成功: ${mediaType}, ${Math.round(base64Data.length * 0.75 / 1024)}KB`);
+                        } catch (err) {
+                            console.error(`[Converter] ❌ 本地图片读取失败 (${resolvedPath}):`, err);
+                            // 本地文件读取失败 → 替换为提示文本
+                            msg.content[i] = {
+                                type: 'text',
+                                text: `[Image from local path could not be read: ${(err as Error).message}. The proxy server may not have access to this file. Path: ${imageUrl.substring(0, 150)}]`,
+                            } as any;
+                        }
+                    } else {
+                        // HTTP(S) URL → 网络下载
+                        urlImages++;
+                        console.log(`[Converter] 📥 下载远程图片 (${urlImages}): ${imageUrl.substring(0, 100)}...`);
+                        try {
+                            const response = await fetch(imageUrl, {
+                                ...getVisionProxyFetchOptions(),
+                                headers: {
+                                    // 部分图片服务（如 Telegram）需要 User-Agent
+                                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+                                },
+                            } as any);
+                            if (!response.ok) throw new Error(`HTTP ${response.status}`);
+                            const buffer = Buffer.from(await response.arrayBuffer());
+                            const contentType = response.headers.get('content-type') || 'image/jpeg';
+                            const mediaType = contentType.split(';')[0].trim();
+                            const base64Data = buffer.toString('base64');
+                            // 替换为 base64 格式
+                            msg.content[i] = {
+                                ...block,
+                                source: { type: 'base64', media_type: mediaType, data: base64Data },
+                            };
+                            console.log(`[Converter] ✅ 图片下载成功: ${mediaType}, ${Math.round(base64Data.length * 0.75 / 1024)}KB`);
+                        } catch (err) {
+                            console.error(`[Converter] ❌ 远程图片下载失败 (${imageUrl.substring(0, 80)}):`, err);
+                            // 下载失败时替换为错误提示文本
+                            msg.content[i] = {
+                                type: 'text',
+                                text: `[Image from URL could not be downloaded: ${(err as Error).message}. URL: ${imageUrl.substring(0, 100)}]`,
+                            } as any;
+                        }
                    }
                } else if (block.source?.type === 'base64' && block.source.data) {
                    base64Images++;
@@ -1043,7 +1158,7 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
    }

    if (totalImages === 0) return;
-    console.log(`[Converter] 📊 图片统计: 总计 ${totalImages} 张 (base64: ${base64Images}, URL下载: ${urlImages})`);
+    console.log(`[Converter] 📊 图片统计: 总计 ${totalImages} 张 (base64: ${base64Images}, URL下载: ${urlImages}, 本地文件: ${localImages})`);

    // ★ Phase 3: 调用 vision 拦截器处理（OCR / 外部 API）
    try {
--- a/src/openai-handler.ts
+++ b/src/openai-handler.ts
@@ -251,7 +251,7 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
                        });
                    }
                } else {
-                    // HTTP(S) URL — 统一存储到 source.data，由 preprocessImages() 下载
+                    // HTTP(S)/local URL — 统一存储到 source.data，由 preprocessImages() 下载/读取
                    blocks.push({
                        type: 'image',
                        source: { type: 'url', media_type: 'image/jpeg', data: url }
@@ -299,12 +299,61 @@ function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicConte
                        source: { type: 'url', media_type: 'image/jpeg', data: url }
                    });
                }
+            } else if (p.type === 'image_file' && (p as any).image_file) {
+                // ★ Assistants API 格式: { type: 'image_file', image_file: { file_id: '...', detail?: '...' } }
+                // file_id 无法直接使用，但记录下来以便调试
+                const fileId = (p as any).image_file.file_id;
+                console.log(`[OpenAI] ⚠️ 收到 image_file 格式 (file_id: ${fileId})，此格式需要 Files API 支持`);
+                blocks.push({ type: 'text', text: `[Image file reference: file_id=${fileId}. This format requires Files API support which is not available.]` });
+            } else if ((p.type === 'image_url' || p.type === 'input_image') && (p as any).url) {
+                // ★ 扁平 URL 格式：某些客户端将 url 直接放在顶层而非 image_url.url
+                const url = (p as any).url as string;
+                if (url.startsWith('data:')) {
+                    const match = url.match(/^data:([^;]+);base64,(.+)$/);
+                    if (match) {
+                        blocks.push({
+                            type: 'image',
+                            source: { type: 'base64', media_type: match[1], data: match[2] }
+                        });
+                    }
+                } else {
+                    blocks.push({
+                        type: 'image',
+                        source: { type: 'url', media_type: 'image/jpeg', data: url }
+                    });
+                }
            } else if (p.type === 'tool_use') {
                // Anthropic 风格 tool_use 块直接透传
                blocks.push(p as unknown as AnthropicContentBlock);
            } else if (p.type === 'tool_result') {
                // Anthropic 风格 tool_result 块直接透传
                blocks.push(p as unknown as AnthropicContentBlock);
+            } else {
+                // ★ 通用兜底：检查未知类型的块是否包含可识别的图片数据
+                const anyP = p as Record<string, unknown>;
+                const possibleUrl = (anyP.url || anyP.file_path || anyP.path ||
+                    (anyP.image_url as any)?.url || anyP.data) as string | undefined;
+                if (possibleUrl && typeof possibleUrl === 'string') {
+                    const looksLikeImage = /\.(jpg|jpeg|png|gif|webp|bmp|svg)/i.test(possibleUrl) ||
+                        possibleUrl.startsWith('data:image/');
+                    if (looksLikeImage) {
+                        console.log(`[OpenAI] 🔄 未知内容类型 "${p.type}" 中检测到图片引用 → 转为 image block`);
+                        if (possibleUrl.startsWith('data:')) {
+                            const match = possibleUrl.match(/^data:([^;]+);base64,(.+)$/);
+                            if (match) {
+                                blocks.push({
+                                    type: 'image',
+                                    source: { type: 'base64', media_type: match[1], data: match[2] }
+                                });
+                            }
+                        } else {
+                            blocks.push({
+                                type: 'image',
+                                source: { type: 'url', media_type: 'image/jpeg', data: possibleUrl }
+                            });
+                        }
+                    }
+                }
            }
        }
        return blocks.length > 0 ? blocks : '';