diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 2a94274..03b9783 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -4,7 +4,8 @@ "Bash(dir e:\\\\CodeAI\\\\github\\\\cursor2api:*)", "Bash(cmd:*)", "WebFetch(domain:wttr.in)", - "mcp__fetch__fetch" + "mcp__fetch__fetch", + "mcp__filesystem__directory_tree" ] } } diff --git a/.gitignore b/.gitignore index 9c301f7..dc7349e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,35 @@ Thumbs.db logs/ *.log +# Environment +# Binaries +*.exe +*.dll +*.so +*.dylib +cursor2api + +# Go +vendor/ +*.test + +# Node +node_modules/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +logs/ +*.log + # Environment .env .env.local @@ -33,3 +62,4 @@ logs/ # Build dist/ build/ +*.traineddata \ No newline at end of file diff --git a/README.md b/README.md index 774006c..bee5926 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ - **Anthropic Messages API 完整兼容** - `/v1/messages` 流式/非流式 - **OpenAI Chat Completions API 兼容** - `/v1/chat/completions` 流式/非流式 + 工具调用 +- **多模态视觉降级处理** - 内置纯本地 CPU OCR 图片文字提取(零配置免 Key),或支持外接第三方免费视觉大模型 API 解释图片。 - **Cursor IDE 场景融合提示词注入** - 不覆盖模型身份,顺应 Cursor 内部角色设定 - **全工具支持** - 无工具白名单限制,支持所有 MCP 工具和自定义扩展 - **多层拒绝拦截** - 自动检测和抑制 Cursor 文档助手的拒绝行为 @@ -48,6 +49,8 @@ npm install 编辑 `config.yaml`: - `cursor_model` - 使用的模型(默认 `anthropic/claude-sonnet-4.6`) - `fingerprint.user_agent` - 浏览器 User-Agent(模拟 Chrome 请求) +- `vision.enabled` - 开启视觉拦截 (`true` 发送图片前进行降级处理)。 +- `vision.mode` - 视觉模式。推荐 `ocr` (全自动零配置文字提取)。如需真视觉理解改为 `api` 并配置 `baseUrl` 和 `apiKey` 后接入 Gemini/OpenRouter 等。 ### 3. 启动 @@ -138,6 +141,14 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use` ## 更新日志 +### v2.3.0 (2026-03-06) — 多模态视觉拦截与降级支持 + +**👁️ 视觉降级护航** +- ✨ 完美解决免费版 Cursor 接口原生不支持图片(抛出 `I cannot view images` 拒绝错误)的痛点。 +- ✨ **开箱即用的纯本地 OCR (`mode: 'ocr'`)**:零配置、免 API Key,利用本机 CPU 毫秒级提取图片/截图中的报错堆栈或代码文本,并无缝重组成上下文发送给大模型处理。 +- ✨ **兼容第三方的外部视觉 API (`mode: 'api'`)**:支持无缝转接 Google Gemini、OpenRouter 等全网免费开源的高级视觉大模型格式,提供超越 OCR 的页面 UI 理解和色彩分析。 +- ✨ 在 Anthropic 和 OpenAI 两种主流请求协议下,自动精准拦截 Base64 和 URL 格式的图片流组合逻辑。 + ### v2.2.0 (2026-03-05) — 身份保护 + 代码精简 **🛡️ 三层身份保护** diff --git a/config.yaml b/config.yaml index 5ba4c06..0365240 100644 --- a/config.yaml +++ b/config.yaml @@ -15,3 +15,17 @@ cursor_model: "anthropic/claude-sonnet-4.6" # 浏览器指纹配置 fingerprint: user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36" + +# 视觉处理降级配置(可选) +# 如果开启,可以拦截您发给大模型的图片进行降级处理(因为目前免费 Cursor 不支持视觉)。 +vision: + enabled: true + # mode 选项: 'ocr' 或 'api' + # 'ocr': [默认模式] 彻底免 Key,零配置,完全依赖本机的 CPU 识图,提取文本、报错日志、代码段后发给大模型。 + # 'api': 需要配置下方的 baseUrl 和 apiKey,把图发给外部视觉模型(如 Gemini、OpenRouter),能“看到”画面内容和色彩。 + mode: 'ocr' + + # ---------- 以下选项仅在 mode: 'api' 时才生效 ---------- + # base_url: "https://openrouter.ai/api/v1/chat/completions" + # api_key: "sk-or-v1-..." + # model: "meta-llama/llama-3.2-11b-vision-instruct:free" diff --git a/cursor2api_update.zip b/cursor2api_update.zip index fecb9f0..e3a406a 100644 Binary files a/cursor2api_update.zip and b/cursor2api_update.zip differ diff --git a/package-lock.json b/package-lock.json index ed45694..23921dd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dotenv": "^16.5.0", "eventsource-parser": "^3.0.1", "express": "^5.1.0", + "tesseract.js": "^7.0.0", "uuid": "^11.1.0", "yaml": "^2.7.1" }, @@ -582,6 +583,12 @@ "node": ">= 0.6" } }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmmirror.com/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT" + }, "node_modules/body-parser": { "version": "2.2.2", "resolved": "https://registry.npmmirror.com/body-parser/-/body-parser-2.2.2.tgz", @@ -1075,6 +1082,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmmirror.com/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0" + }, "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmmirror.com/inherits/-/inherits-2.0.4.tgz", @@ -1096,6 +1109,12 @@ "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==", "license": "MIT" }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmmirror.com/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT" + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -1166,6 +1185,26 @@ "node": ">= 0.6" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmmirror.com/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/object-inspect": { "version": "1.13.4", "resolved": "https://registry.npmmirror.com/object-inspect/-/object-inspect-1.13.4.tgz", @@ -1199,6 +1238,15 @@ "wrappy": "1" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmmirror.com/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmmirror.com/parseurl/-/parseurl-1.3.3.tgz", @@ -1270,6 +1318,12 @@ "node": ">= 0.10" } }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT" + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmmirror.com/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -1434,6 +1488,30 @@ "node": ">= 0.8" } }, + "node_modules/tesseract.js": { + "version": "7.0.0", + "resolved": "https://registry.npmmirror.com/tesseract.js/-/tesseract.js-7.0.0.tgz", + "integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^7.0.0", + "wasm-feature-detect": "^1.8.0", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "7.0.0", + "resolved": "https://registry.npmmirror.com/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz", + "integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==", + "license": "Apache-2.0" + }, "node_modules/toidentifier": { "version": "1.0.1", "resolved": "https://registry.npmmirror.com/toidentifier/-/toidentifier-1.0.1.tgz", @@ -1443,6 +1521,12 @@ "node": ">=0.6" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmmirror.com/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/tsx": { "version": "4.21.0", "resolved": "https://registry.npmmirror.com/tsx/-/tsx-4.21.0.tgz", @@ -1529,6 +1613,28 @@ "node": ">= 0.8" } }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmmirror.com/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0" + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmmirror.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmmirror.com/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz", @@ -1549,6 +1655,15 @@ "funding": { "url": "https://github.com/sponsors/eemeli" } + }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmmirror.com/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "engines": { + "node": "*" + } } } } diff --git a/package.json b/package.json index fc6dc99..713f5d6 100644 --- a/package.json +++ b/package.json @@ -9,11 +9,12 @@ "start": "node dist/index.js" }, "dependencies": { - "express": "^5.1.0", - "uuid": "^11.1.0", "dotenv": "^16.5.0", - "yaml": "^2.7.1", - "eventsource-parser": "^3.0.1" + "eventsource-parser": "^3.0.1", + "express": "^5.1.0", + "tesseract.js": "^7.0.0", + "uuid": "^11.1.0", + "yaml": "^2.7.1" }, "devDependencies": { "@types/express": "^5.0.2", diff --git a/src/config.ts b/src/config.ts index e6d3a08..9388f6a 100644 --- a/src/config.ts +++ b/src/config.ts @@ -29,6 +29,15 @@ export function getConfig(): AppConfig { if (yaml.fingerprint) { if (yaml.fingerprint.user_agent) config.fingerprint.userAgent = yaml.fingerprint.user_agent; } + if (yaml.vision) { + config.vision = { + enabled: yaml.vision.enabled !== false, // default to true if vision section exists in some way + mode: yaml.vision.mode || 'ocr', + baseUrl: yaml.vision.base_url || 'https://api.openai.com/v1/chat/completions', + apiKey: yaml.vision.api_key || '', + model: yaml.vision.model || 'gpt-4o-mini', + }; + } } catch (e) { console.warn('[Config] 读取 config.yaml 失败:', e); } diff --git a/src/handler.ts b/src/handler.ts index c87857e..2441c07 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -16,6 +16,7 @@ import type { import { convertToCursorRequest, parseToolCalls, hasToolCalls } from './converter.js'; import { sendCursorRequest, sendCursorRequestFull } from './cursor-client.js'; import { getConfig } from './config.js'; +import { applyVisionInterceptor } from './vision.js'; function msgId(): string { return 'msg_' + uuidv4().replace(/-/g, '').substring(0, 24); @@ -264,6 +265,8 @@ export async function handleMessages(req: Request, res: Response): Promise console.log(`[Handler] 收到请求: model=${body.model}, messages=${body.messages?.length}, stream=${body.stream}, tools=${body.tools?.length ?? 0}`); try { + await applyVisionInterceptor(body.messages); + if (isIdentityProbe(body)) { console.log(`[Handler] 拦截到身份探针,返回模拟响应以规避风控`); if (body.stream) { diff --git a/src/openai-handler.ts b/src/openai-handler.ts index aa50593..8330f3d 100644 --- a/src/openai-handler.ts +++ b/src/openai-handler.ts @@ -24,6 +24,7 @@ import type { import { convertToCursorRequest, parseToolCalls, hasToolCalls } from './converter.js'; import { sendCursorRequest, sendCursorRequestFull } from './cursor-client.js'; import { getConfig } from './config.js'; +import { applyVisionInterceptor } from './vision.js'; function chatId(): string { return 'chatcmpl-' + uuidv4().replace(/-/g, '').substring(0, 24); @@ -60,9 +61,11 @@ function convertToAnthropicRequest(body: OpenAIChatRequest): AnthropicRequest { case 'assistant': { // 助手消息可能包含 tool_calls const blocks: AnthropicContentBlock[] = []; - const textContent = extractOpenAIContent(msg); - if (textContent) { - blocks.push({ type: 'text', text: textContent }); + const contentBlocks = extractOpenAIContentBlocks(msg); + if (typeof contentBlocks === 'string' && contentBlocks) { + blocks.push({ type: 'text', text: contentBlocks }); + } else if (Array.isArray(contentBlocks)) { + blocks.push(...contentBlocks); } if (msg.tool_calls && msg.tool_calls.length > 0) { @@ -84,7 +87,7 @@ function convertToAnthropicRequest(body: OpenAIChatRequest): AnthropicRequest { messages.push({ role: 'assistant', - content: blocks.length > 0 ? blocks : (textContent || ''), + content: blocks.length > 0 ? blocks : (typeof extractOpenAIContentBlocks(msg) === 'string' ? extractOpenAIContentBlocks(msg) as string : ''), }); break; } @@ -127,20 +130,48 @@ function convertToAnthropicRequest(body: OpenAIChatRequest): AnthropicRequest { } /** - * 从 OpenAI 消息中提取文本内容 + * 从 OpenAI 消息中提取文本或多模态内容块 */ -function extractOpenAIContent(msg: OpenAIMessage): string { +function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicContentBlock[] { if (msg.content === null || msg.content === undefined) return ''; if (typeof msg.content === 'string') return msg.content; if (Array.isArray(msg.content)) { - return msg.content - .filter(p => p.type === 'text' && p.text) - .map(p => p.text!) - .join('\n'); + const blocks: AnthropicContentBlock[] = []; + for (const p of msg.content) { + if (p.type === 'text' && p.text) { + blocks.push({ type: 'text', text: p.text }); + } else if (p.type === 'image_url' && p.image_url?.url) { + const url = p.image_url.url; + if (url.startsWith('data:')) { + const match = url.match(/^data:([^;]+);base64,(.+)$/); + if (match) { + blocks.push({ + type: 'image', + source: { type: 'base64', media_type: match[1], data: match[2] } + }); + } + } else { + blocks.push({ + type: 'image', + source: { type: 'url', media_type: 'image/jpeg', data: url } + }); + } + } + } + return blocks.length > 0 ? blocks : ''; } return String(msg.content); } +/** + * 仅提取纯文本(用于系统提示词和旧行为) + */ +function extractOpenAIContent(msg: OpenAIMessage): string { + const blocks = extractOpenAIContentBlocks(msg); + if (typeof blocks === 'string') return blocks; + return blocks.filter(b => b.type === 'text').map(b => b.text).join('\n'); +} + // ==================== 主处理入口 ==================== export async function handleOpenAIChatCompletions(req: Request, res: Response): Promise { @@ -152,6 +183,9 @@ export async function handleOpenAIChatCompletions(req: Request, res: Response): // Step 1: OpenAI → Anthropic 格式 const anthropicReq = convertToAnthropicRequest(body); + // Step 1.5: 应用视觉拦截器(如果启用,会将 anthropicReq 中的 image 转换为 text) + await applyVisionInterceptor(anthropicReq.messages); + // Step 2: Anthropic → Cursor 格式(复用现有管道) const cursorReq = convertToCursorRequest(anthropicReq); diff --git a/src/types.ts b/src/types.ts index a01fb2e..314a087 100644 --- a/src/types.ts +++ b/src/types.ts @@ -20,6 +20,8 @@ export interface AnthropicMessage { export interface AnthropicContentBlock { type: 'text' | 'tool_use' | 'tool_result' | 'image'; text?: string; + // image fields + source?: { type: string; media_type?: string; data: string }; // tool_use fields id?: string; name?: string; @@ -91,6 +93,13 @@ export interface AppConfig { timeout: number; proxy?: string; cursorModel: string; + vision?: { + enabled: boolean; + mode: 'ocr' | 'api'; + baseUrl: string; + apiKey: string; + model: string; + }; fingerprint: { userAgent: string; }; diff --git a/src/vision.ts b/src/vision.ts new file mode 100644 index 0000000..dd5cf52 --- /dev/null +++ b/src/vision.ts @@ -0,0 +1,133 @@ +import { getConfig } from './config.js'; +import type { AnthropicMessage, AnthropicContentBlock } from './types.js'; +import { createWorker } from 'tesseract.js'; + +export async function applyVisionInterceptor(messages: AnthropicMessage[]): Promise { + const config = getConfig(); + if (!config.vision?.enabled) return; + + for (const msg of messages) { + if (!Array.isArray(msg.content)) continue; + + let hasImages = false; + const newContent: AnthropicContentBlock[] = []; + const imagesToAnalyze: AnthropicContentBlock[] = []; + + for (const block of msg.content) { + if (block.type === 'image') { + hasImages = true; + imagesToAnalyze.push(block); + } else { + newContent.push(block); + } + } + + if (hasImages && imagesToAnalyze.length > 0) { + try { + let descriptions = ''; + if (config.vision.mode === 'ocr') { + console.log(`[Vision] 启用纯本地 OCR 模式,正在提取 ${imagesToAnalyze.length} 张图片上的文字... (无需 API Key)`); + descriptions = await processWithLocalOCR(imagesToAnalyze); + } else { + console.log(`[Vision] 启用外部 API 模式,正在分析 ${imagesToAnalyze.length} 张图片...`); + descriptions = await callVisionAPI(imagesToAnalyze); + } + + // Add descriptions as a simulated system text block + newContent.push({ + type: 'text', + text: `\n\n[System: The user attached ${imagesToAnalyze.length} image(s). Visual analysis/OCR extracted the following context:\n${descriptions}]\n\n` + }); + + msg.content = newContent; + } catch (e) { + console.error("[Vision API Error]", e); + newContent.push({ + type: 'text', + text: `\n\n[System: The user attached image(s), but the Vision interceptor failed to process them. Error: ${(e as Error).message}]\n\n` + }); + msg.content = newContent; + } + } + } +} + +async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promise { + const worker = await createWorker('eng+chi_sim'); + let combinedText = ''; + + for (let i = 0; i < imageBlocks.length; i++) { + const img = imageBlocks[i]; + let imageSource: string | Buffer = ''; + + if (img.type === 'image' && img.source?.data) { + if (img.source.type === 'base64') { + const mime = img.source.media_type || 'image/jpeg'; + imageSource = `data:${mime};base64,${img.source.data}`; + } else if (img.source.type === 'url') { + imageSource = img.source.data; + } + } + + if (imageSource) { + try { + const { data: { text } } = await worker.recognize(imageSource); + combinedText += `--- Image ${i + 1} OCR Text ---\n${text.trim() || '(No text detected in this image)'}\n\n`; + } catch (err) { + console.error(`[Vision OCR] Failed to parse image ${i + 1}:`, err); + combinedText += `--- Image ${i + 1} ---\n(Failed to parse image with local OCR)\n\n`; + } + } + } + + await worker.terminate(); + return combinedText; +} + +async function callVisionAPI(imageBlocks: AnthropicContentBlock[]): Promise { + const config = getConfig().vision!; + + // Construct an array of OpenAI format message parts + const parts: any[] = [ + { type: 'text', text: 'Please describe the attached images in detail. If they contain code, UI elements, or error messages, explicitly write them out.' } + ]; + + for (const img of imageBlocks) { + if (img.type === 'image' && img.source?.data) { + let url = ''; + // If it's a raw base64 string + if (img.source.type === 'base64') { + const mime = img.source.media_type || 'image/jpeg'; + url = `data:${mime};base64,${img.source.data}`; + } else if (img.source.type === 'url') { + // Handle remote URLs natively mapped from OpenAI payloads + url = img.source.data; + } + if (url) { + parts.push({ type: 'image_url', image_url: { url } }); + } + } + } + + const payload = { + model: config.model, + messages: [{ role: 'user', content: parts }], + max_tokens: 1500 + }; + + const res = await fetch(config.baseUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.apiKey}` + }, + body: JSON.stringify(payload) + }); + + if (!res.ok) { + throw new Error(`Vision API returned status ${res.status}: ${await res.text()}`); + } + + const data = await res.json() as any; + return data.choices?.[0]?.message?.content || 'No description returned.'; +}