From 53335aeeab289575522134eb23e680ad4d8a278f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=B5=B7?= <7836246@qq.com> Date: Thu, 19 Mar 2026 09:06:20 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20SVG=20=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E5=AF=BC=E8=87=B4=20tesseract.js=20=E5=B4=A9=E6=BA=83?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=20(#69)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SVG 是基于 XML 的矢量图格式,tesseract.js 无法处理。 尝试对 SVG 进行 OCR 会在 tesseract Worker 内部通过 process.nextTick 抛出未捕获的 TypeError (fetch failed),导致整个进程异常终止。 修复方案:在三层防御点过滤 SVG 图片: 1. converter.ts preprocessImages(): 下载/读取后检测 content-type 或文件扩展名为 SVG 时,替换为文本描述 2. vision.ts applyVisionInterceptor(): 在分发到 OCR/Vision API 之前过滤 SVG 类型的 image block 3. vision.ts processWithLocalOCR(): 在调用 worker.recognize() 之前检查 UNSUPPORTED_OCR_TYPES 集合(纵深防御) --- src/converter.ts | 22 +++++++++++++++++++++- src/vision.ts | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/converter.ts b/src/converter.ts index 6f9b0a1..d605fd1 100644 --- a/src/converter.ts +++ b/src/converter.ts @@ -1196,8 +1196,18 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise { if (!existsSync(resolvedPath)) { throw new Error(`File not found: ${resolvedPath}`); } - const fileBuffer = readFileSync(resolvedPath); const mediaType = guessMediaType(resolvedPath); + // ★ SVG 是矢量图格式(XML),无法被 OCR 或 Vision API 处理 + // tesseract.js 处理 SVG 会抛出 unhandled error 导致进程崩溃 + if (mediaType === 'image/svg+xml') { + console.log(`[Converter] ⚠️ 跳过 SVG 矢量图(不支持 OCR/Vision): ${resolvedPath}`); + msg.content[i] = { + type: 'text', + text: `[SVG vector image attached: ${resolvedPath.substring(resolvedPath.lastIndexOf('/') + 1)}. SVG images are XML-based vector graphics and cannot be processed by OCR/Vision. The image likely contains a logo, icon, badge, or diagram.]`, + } as any; + continue; + } + const fileBuffer = readFileSync(resolvedPath); const base64Data = fileBuffer.toString('base64'); msg.content[i] = { ...block, @@ -1228,6 +1238,16 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise { const buffer = Buffer.from(await response.arrayBuffer()); const contentType = response.headers.get('content-type') || 'image/jpeg'; const mediaType = contentType.split(';')[0].trim(); + // ★ SVG 是矢量图格式(XML),无法被 OCR 或 Vision API 处理 + // tesseract.js 处理 SVG 会抛出 unhandled error 导致进程崩溃(#69) + if (mediaType === 'image/svg+xml' || imageUrl.toLowerCase().endsWith('.svg')) { + console.log(`[Converter] ⚠️ 跳过 SVG 矢量图(不支持 OCR/Vision): ${imageUrl.substring(0, 100)}`); + msg.content[i] = { + type: 'text', + text: `[SVG vector image from URL: ${imageUrl}. SVG images are XML-based vector graphics and cannot be processed by OCR/Vision. The image likely contains a logo, icon, badge, or diagram.]`, + } as any; + continue; + } const base64Data = buffer.toString('base64'); // 替换为 base64 格式 msg.content[i] = { diff --git a/src/vision.ts b/src/vision.ts index 1266232..7e82668 100644 --- a/src/vision.ts +++ b/src/vision.ts @@ -26,6 +26,16 @@ export async function applyVisionInterceptor(messages: AnthropicMessage[]): Prom for (const block of lastUserMsg.content) { if (block.type === 'image') { + // ★ 跳过 SVG 矢量图 — tesseract.js 无法处理 SVG,会导致进程崩溃 (#69) + const mediaType = (block as any).source?.media_type || ''; + if (mediaType === 'image/svg+xml') { + console.log('[Vision] ⚠️ 跳过 SVG 矢量图(不支持 OCR/Vision 处理)'); + newContent.push({ + type: 'text', + text: '[SVG vector image was attached but cannot be processed by OCR/Vision. It likely contains a logo, icon, badge, or diagram.]', + }); + continue; + } hasImages = true; imagesToAnalyze.push(block); } else { @@ -60,6 +70,9 @@ export async function applyVisionInterceptor(messages: AnthropicMessage[]): Prom } } +// ★ 不支持 OCR 的图片格式(矢量图、动画等) +const UNSUPPORTED_OCR_TYPES = new Set(['image/svg+xml']); + async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promise { const worker = await createWorker('eng+chi_sim'); let combinedText = ''; @@ -69,6 +82,11 @@ async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promis let imageSource: string | Buffer = ''; if (img.type === 'image' && img.source) { + // ★ 防御性检查:跳过不支持 OCR 的格式(#69 - SVG 导致 tesseract 崩溃) + if (UNSUPPORTED_OCR_TYPES.has(img.source.media_type || '')) { + combinedText += `--- Image ${i + 1} ---\n(Skipped: ${img.source.media_type} format is not supported by OCR)\n\n`; + continue; + } const sourceData = img.source.data || img.source.url; if (img.source.type === 'base64' && sourceData) { const mime = img.source.media_type || 'image/jpeg';