fix: 修复 SVG 图片导致 tesseract.js 崩溃的问题 (#69)

SVG 是基于 XML 的矢量图格式,tesseract.js 无法处理。
尝试对 SVG 进行 OCR 会在 tesseract Worker 内部通过 process.nextTick
抛出未捕获的 TypeError (fetch failed),导致整个进程异常终止。

修复方案:在三层防御点过滤 SVG 图片:
1. converter.ts preprocessImages(): 下载/读取后检测 content-type
   或文件扩展名为 SVG 时,替换为文本描述
2. vision.ts applyVisionInterceptor(): 在分发到 OCR/Vision API
   之前过滤 SVG 类型的 image block
3. vision.ts processWithLocalOCR(): 在调用 worker.recognize()
   之前检查 UNSUPPORTED_OCR_TYPES 集合(纵深防御)
This commit is contained in:
小海
2026-03-19 09:06:20 +08:00
parent 23c9f16dff
commit 53335aeeab
2 changed files with 39 additions and 1 deletions

View File

@@ -1196,8 +1196,18 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
if (!existsSync(resolvedPath)) {
throw new Error(`File not found: ${resolvedPath}`);
}
const fileBuffer = readFileSync(resolvedPath);
const mediaType = guessMediaType(resolvedPath);
// ★ SVG 是矢量图格式XML无法被 OCR 或 Vision API 处理
// tesseract.js 处理 SVG 会抛出 unhandled error 导致进程崩溃
if (mediaType === 'image/svg+xml') {
console.log(`[Converter] ⚠️ 跳过 SVG 矢量图(不支持 OCR/Vision: ${resolvedPath}`);
msg.content[i] = {
type: 'text',
text: `[SVG vector image attached: ${resolvedPath.substring(resolvedPath.lastIndexOf('/') + 1)}. SVG images are XML-based vector graphics and cannot be processed by OCR/Vision. The image likely contains a logo, icon, badge, or diagram.]`,
} as any;
continue;
}
const fileBuffer = readFileSync(resolvedPath);
const base64Data = fileBuffer.toString('base64');
msg.content[i] = {
...block,
@@ -1228,6 +1238,16 @@ async function preprocessImages(messages: AnthropicMessage[]): Promise<void> {
const buffer = Buffer.from(await response.arrayBuffer());
const contentType = response.headers.get('content-type') || 'image/jpeg';
const mediaType = contentType.split(';')[0].trim();
// ★ SVG 是矢量图格式XML无法被 OCR 或 Vision API 处理
// tesseract.js 处理 SVG 会抛出 unhandled error 导致进程崩溃(#69
if (mediaType === 'image/svg+xml' || imageUrl.toLowerCase().endsWith('.svg')) {
console.log(`[Converter] ⚠️ 跳过 SVG 矢量图(不支持 OCR/Vision: ${imageUrl.substring(0, 100)}`);
msg.content[i] = {
type: 'text',
text: `[SVG vector image from URL: ${imageUrl}. SVG images are XML-based vector graphics and cannot be processed by OCR/Vision. The image likely contains a logo, icon, badge, or diagram.]`,
} as any;
continue;
}
const base64Data = buffer.toString('base64');
// 替换为 base64 格式
msg.content[i] = {

View File

@@ -26,6 +26,16 @@ export async function applyVisionInterceptor(messages: AnthropicMessage[]): Prom
for (const block of lastUserMsg.content) {
if (block.type === 'image') {
// ★ 跳过 SVG 矢量图 — tesseract.js 无法处理 SVG会导致进程崩溃 (#69)
const mediaType = (block as any).source?.media_type || '';
if (mediaType === 'image/svg+xml') {
console.log('[Vision] ⚠️ 跳过 SVG 矢量图(不支持 OCR/Vision 处理)');
newContent.push({
type: 'text',
text: '[SVG vector image was attached but cannot be processed by OCR/Vision. It likely contains a logo, icon, badge, or diagram.]',
});
continue;
}
hasImages = true;
imagesToAnalyze.push(block);
} else {
@@ -60,6 +70,9 @@ export async function applyVisionInterceptor(messages: AnthropicMessage[]): Prom
}
}
// ★ 不支持 OCR 的图片格式(矢量图、动画等)
const UNSUPPORTED_OCR_TYPES = new Set(['image/svg+xml']);
async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promise<string> {
const worker = await createWorker('eng+chi_sim');
let combinedText = '';
@@ -69,6 +82,11 @@ async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promis
let imageSource: string | Buffer = '';
if (img.type === 'image' && img.source) {
// ★ 防御性检查:跳过不支持 OCR 的格式(#69 - SVG 导致 tesseract 崩溃)
if (UNSUPPORTED_OCR_TYPES.has(img.source.media_type || '')) {
combinedText += `--- Image ${i + 1} ---\n(Skipped: ${img.source.media_type} format is not supported by OCR)\n\n`;
continue;
}
const sourceData = img.source.data || img.source.url;
if (img.source.type === 'base64' && sourceData) {
const mime = img.source.media_type || 'image/jpeg';