mirror of
https://github.com/7836246/cursor2api.git
synced 2026-06-02 11:59:45 +08:00
feat(vision): add zero-config local OCR and external vision api fallback for image payloads
This commit is contained in:
@@ -4,7 +4,8 @@
|
||||
"Bash(dir e:\\\\CodeAI\\\\github\\\\cursor2api:*)",
|
||||
"Bash(cmd:*)",
|
||||
"WebFetch(domain:wttr.in)",
|
||||
"mcp__fetch__fetch"
|
||||
"mcp__fetch__fetch",
|
||||
"mcp__filesystem__directory_tree"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
30
.gitignore
vendored
30
.gitignore
vendored
@@ -26,6 +26,35 @@ Thumbs.db
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Environment
|
||||
# Binaries
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
cursor2api
|
||||
|
||||
# Go
|
||||
vendor/
|
||||
*.test
|
||||
|
||||
# Node
|
||||
node_modules/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.local
|
||||
@@ -33,3 +62,4 @@ logs/
|
||||
# Build
|
||||
dist/
|
||||
build/
|
||||
*.traineddata
|
||||
11
README.md
11
README.md
@@ -27,6 +27,7 @@
|
||||
|
||||
- **Anthropic Messages API 完整兼容** - `/v1/messages` 流式/非流式
|
||||
- **OpenAI Chat Completions API 兼容** - `/v1/chat/completions` 流式/非流式 + 工具调用
|
||||
- **多模态视觉降级处理** - 内置纯本地 CPU OCR 图片文字提取(零配置免 Key),或支持外接第三方免费视觉大模型 API 解释图片。
|
||||
- **Cursor IDE 场景融合提示词注入** - 不覆盖模型身份,顺应 Cursor 内部角色设定
|
||||
- **全工具支持** - 无工具白名单限制,支持所有 MCP 工具和自定义扩展
|
||||
- **多层拒绝拦截** - 自动检测和抑制 Cursor 文档助手的拒绝行为
|
||||
@@ -48,6 +49,8 @@ npm install
|
||||
编辑 `config.yaml`:
|
||||
- `cursor_model` - 使用的模型(默认 `anthropic/claude-sonnet-4.6`)
|
||||
- `fingerprint.user_agent` - 浏览器 User-Agent(模拟 Chrome 请求)
|
||||
- `vision.enabled` - 开启视觉拦截 (`true` 发送图片前进行降级处理)。
|
||||
- `vision.mode` - 视觉模式。推荐 `ocr` (全自动零配置文字提取)。如需真视觉理解改为 `api` 并配置 `baseUrl` 和 `apiKey` 后接入 Gemini/OpenRouter 等。
|
||||
|
||||
### 3. 启动
|
||||
|
||||
@@ -138,6 +141,14 @@ AI 按此格式输出 → 我们解析并转换为标准的 Anthropic `tool_use`
|
||||
|
||||
## 更新日志
|
||||
|
||||
### v2.3.0 (2026-03-06) — 多模态视觉拦截与降级支持
|
||||
|
||||
**👁️ 视觉降级护航**
|
||||
- ✨ 完美解决免费版 Cursor 接口原生不支持图片(抛出 `I cannot view images` 拒绝错误)的痛点。
|
||||
- ✨ **开箱即用的纯本地 OCR (`mode: 'ocr'`)**:零配置、免 API Key,利用本机 CPU 毫秒级提取图片/截图中的报错堆栈或代码文本,并无缝重组成上下文发送给大模型处理。
|
||||
- ✨ **兼容第三方的外部视觉 API (`mode: 'api'`)**:支持无缝转接 Google Gemini、OpenRouter 等全网免费开源的高级视觉大模型格式,提供超越 OCR 的页面 UI 理解和色彩分析。
|
||||
- ✨ 在 Anthropic 和 OpenAI 两种主流请求协议下,自动精准拦截 Base64 和 URL 格式的图片流组合逻辑。
|
||||
|
||||
### v2.2.0 (2026-03-05) — 身份保护 + 代码精简
|
||||
|
||||
**🛡️ 三层身份保护**
|
||||
|
||||
14
config.yaml
14
config.yaml
@@ -15,3 +15,17 @@ cursor_model: "anthropic/claude-sonnet-4.6"
|
||||
# 浏览器指纹配置
|
||||
fingerprint:
|
||||
user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
|
||||
|
||||
# 视觉处理降级配置(可选)
|
||||
# 如果开启,可以拦截您发给大模型的图片进行降级处理(因为目前免费 Cursor 不支持视觉)。
|
||||
vision:
|
||||
enabled: true
|
||||
# mode 选项: 'ocr' 或 'api'
|
||||
# 'ocr': [默认模式] 彻底免 Key,零配置,完全依赖本机的 CPU 识图,提取文本、报错日志、代码段后发给大模型。
|
||||
# 'api': 需要配置下方的 baseUrl 和 apiKey,把图发给外部视觉模型(如 Gemini、OpenRouter),能“看到”画面内容和色彩。
|
||||
mode: 'ocr'
|
||||
|
||||
# ---------- 以下选项仅在 mode: 'api' 时才生效 ----------
|
||||
# base_url: "https://openrouter.ai/api/v1/chat/completions"
|
||||
# api_key: "sk-or-v1-..."
|
||||
# model: "meta-llama/llama-3.2-11b-vision-instruct:free"
|
||||
|
||||
Binary file not shown.
115
package-lock.json
generated
115
package-lock.json
generated
@@ -11,6 +11,7 @@
|
||||
"dotenv": "^16.5.0",
|
||||
"eventsource-parser": "^3.0.1",
|
||||
"express": "^5.1.0",
|
||||
"tesseract.js": "^7.0.0",
|
||||
"uuid": "^11.1.0",
|
||||
"yaml": "^2.7.1"
|
||||
},
|
||||
@@ -582,6 +583,12 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/bmp-js": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/bmp-js/-/bmp-js-0.1.0.tgz",
|
||||
"integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/body-parser": {
|
||||
"version": "2.2.2",
|
||||
"resolved": "https://registry.npmmirror.com/body-parser/-/body-parser-2.2.2.tgz",
|
||||
@@ -1075,6 +1082,12 @@
|
||||
"url": "https://opencollective.com/express"
|
||||
}
|
||||
},
|
||||
"node_modules/idb-keyval": {
|
||||
"version": "6.2.2",
|
||||
"resolved": "https://registry.npmmirror.com/idb-keyval/-/idb-keyval-6.2.2.tgz",
|
||||
"integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/inherits": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmmirror.com/inherits/-/inherits-2.0.4.tgz",
|
||||
@@ -1096,6 +1109,12 @@
|
||||
"integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/is-url": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmmirror.com/is-url/-/is-url-1.2.4.tgz",
|
||||
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/math-intrinsics": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||
@@ -1166,6 +1185,26 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmmirror.com/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"encoding": "^0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"encoding": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/object-inspect": {
|
||||
"version": "1.13.4",
|
||||
"resolved": "https://registry.npmmirror.com/object-inspect/-/object-inspect-1.13.4.tgz",
|
||||
@@ -1199,6 +1238,15 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/opencollective-postinstall": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
|
||||
"integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==",
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"opencollective-postinstall": "index.js"
|
||||
}
|
||||
},
|
||||
"node_modules/parseurl": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://registry.npmmirror.com/parseurl/-/parseurl-1.3.3.tgz",
|
||||
@@ -1270,6 +1318,12 @@
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/regenerator-runtime": {
|
||||
"version": "0.13.11",
|
||||
"resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
|
||||
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
@@ -1434,6 +1488,30 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/tesseract.js": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/tesseract.js/-/tesseract.js-7.0.0.tgz",
|
||||
"integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"bmp-js": "^0.1.0",
|
||||
"idb-keyval": "^6.2.0",
|
||||
"is-url": "^1.2.4",
|
||||
"node-fetch": "^2.6.9",
|
||||
"opencollective-postinstall": "^2.0.3",
|
||||
"regenerator-runtime": "^0.13.3",
|
||||
"tesseract.js-core": "^7.0.0",
|
||||
"wasm-feature-detect": "^1.8.0",
|
||||
"zlibjs": "^0.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/tesseract.js-core": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz",
|
||||
"integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/toidentifier": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/toidentifier/-/toidentifier-1.0.1.tgz",
|
||||
@@ -1443,6 +1521,12 @@
|
||||
"node": ">=0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/tr46": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/tr46/-/tr46-0.0.3.tgz",
|
||||
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.21.0",
|
||||
"resolved": "https://registry.npmmirror.com/tsx/-/tsx-4.21.0.tgz",
|
||||
@@ -1529,6 +1613,28 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/wasm-feature-detect": {
|
||||
"version": "1.8.0",
|
||||
"resolved": "https://registry.npmmirror.com/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz",
|
||||
"integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/webidl-conversions": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/whatwg-url": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tr46": "~0.0.3",
|
||||
"webidl-conversions": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz",
|
||||
@@ -1549,6 +1655,15 @@
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/eemeli"
|
||||
}
|
||||
},
|
||||
"node_modules/zlibjs": {
|
||||
"version": "0.3.1",
|
||||
"resolved": "https://registry.npmmirror.com/zlibjs/-/zlibjs-0.3.1.tgz",
|
||||
"integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,11 +9,12 @@
|
||||
"start": "node dist/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"express": "^5.1.0",
|
||||
"uuid": "^11.1.0",
|
||||
"dotenv": "^16.5.0",
|
||||
"yaml": "^2.7.1",
|
||||
"eventsource-parser": "^3.0.1"
|
||||
"eventsource-parser": "^3.0.1",
|
||||
"express": "^5.1.0",
|
||||
"tesseract.js": "^7.0.0",
|
||||
"uuid": "^11.1.0",
|
||||
"yaml": "^2.7.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^5.0.2",
|
||||
|
||||
@@ -29,6 +29,15 @@ export function getConfig(): AppConfig {
|
||||
if (yaml.fingerprint) {
|
||||
if (yaml.fingerprint.user_agent) config.fingerprint.userAgent = yaml.fingerprint.user_agent;
|
||||
}
|
||||
if (yaml.vision) {
|
||||
config.vision = {
|
||||
enabled: yaml.vision.enabled !== false, // default to true if vision section exists in some way
|
||||
mode: yaml.vision.mode || 'ocr',
|
||||
baseUrl: yaml.vision.base_url || 'https://api.openai.com/v1/chat/completions',
|
||||
apiKey: yaml.vision.api_key || '',
|
||||
model: yaml.vision.model || 'gpt-4o-mini',
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[Config] 读取 config.yaml 失败:', e);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import type {
|
||||
import { convertToCursorRequest, parseToolCalls, hasToolCalls } from './converter.js';
|
||||
import { sendCursorRequest, sendCursorRequestFull } from './cursor-client.js';
|
||||
import { getConfig } from './config.js';
|
||||
import { applyVisionInterceptor } from './vision.js';
|
||||
|
||||
function msgId(): string {
|
||||
return 'msg_' + uuidv4().replace(/-/g, '').substring(0, 24);
|
||||
@@ -264,6 +265,8 @@ export async function handleMessages(req: Request, res: Response): Promise<void>
|
||||
console.log(`[Handler] 收到请求: model=${body.model}, messages=${body.messages?.length}, stream=${body.stream}, tools=${body.tools?.length ?? 0}`);
|
||||
|
||||
try {
|
||||
await applyVisionInterceptor(body.messages);
|
||||
|
||||
if (isIdentityProbe(body)) {
|
||||
console.log(`[Handler] 拦截到身份探针,返回模拟响应以规避风控`);
|
||||
if (body.stream) {
|
||||
|
||||
@@ -24,6 +24,7 @@ import type {
|
||||
import { convertToCursorRequest, parseToolCalls, hasToolCalls } from './converter.js';
|
||||
import { sendCursorRequest, sendCursorRequestFull } from './cursor-client.js';
|
||||
import { getConfig } from './config.js';
|
||||
import { applyVisionInterceptor } from './vision.js';
|
||||
|
||||
function chatId(): string {
|
||||
return 'chatcmpl-' + uuidv4().replace(/-/g, '').substring(0, 24);
|
||||
@@ -60,9 +61,11 @@ function convertToAnthropicRequest(body: OpenAIChatRequest): AnthropicRequest {
|
||||
case 'assistant': {
|
||||
// 助手消息可能包含 tool_calls
|
||||
const blocks: AnthropicContentBlock[] = [];
|
||||
const textContent = extractOpenAIContent(msg);
|
||||
if (textContent) {
|
||||
blocks.push({ type: 'text', text: textContent });
|
||||
const contentBlocks = extractOpenAIContentBlocks(msg);
|
||||
if (typeof contentBlocks === 'string' && contentBlocks) {
|
||||
blocks.push({ type: 'text', text: contentBlocks });
|
||||
} else if (Array.isArray(contentBlocks)) {
|
||||
blocks.push(...contentBlocks);
|
||||
}
|
||||
|
||||
if (msg.tool_calls && msg.tool_calls.length > 0) {
|
||||
@@ -84,7 +87,7 @@ function convertToAnthropicRequest(body: OpenAIChatRequest): AnthropicRequest {
|
||||
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: blocks.length > 0 ? blocks : (textContent || ''),
|
||||
content: blocks.length > 0 ? blocks : (typeof extractOpenAIContentBlocks(msg) === 'string' ? extractOpenAIContentBlocks(msg) as string : ''),
|
||||
});
|
||||
break;
|
||||
}
|
||||
@@ -127,20 +130,48 @@ function convertToAnthropicRequest(body: OpenAIChatRequest): AnthropicRequest {
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 OpenAI 消息中提取文本内容
|
||||
* 从 OpenAI 消息中提取文本或多模态内容块
|
||||
*/
|
||||
function extractOpenAIContent(msg: OpenAIMessage): string {
|
||||
function extractOpenAIContentBlocks(msg: OpenAIMessage): string | AnthropicContentBlock[] {
|
||||
if (msg.content === null || msg.content === undefined) return '';
|
||||
if (typeof msg.content === 'string') return msg.content;
|
||||
if (Array.isArray(msg.content)) {
|
||||
return msg.content
|
||||
.filter(p => p.type === 'text' && p.text)
|
||||
.map(p => p.text!)
|
||||
.join('\n');
|
||||
const blocks: AnthropicContentBlock[] = [];
|
||||
for (const p of msg.content) {
|
||||
if (p.type === 'text' && p.text) {
|
||||
blocks.push({ type: 'text', text: p.text });
|
||||
} else if (p.type === 'image_url' && p.image_url?.url) {
|
||||
const url = p.image_url.url;
|
||||
if (url.startsWith('data:')) {
|
||||
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
||||
if (match) {
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'base64', media_type: match[1], data: match[2] }
|
||||
});
|
||||
}
|
||||
} else {
|
||||
blocks.push({
|
||||
type: 'image',
|
||||
source: { type: 'url', media_type: 'image/jpeg', data: url }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return blocks.length > 0 ? blocks : '';
|
||||
}
|
||||
return String(msg.content);
|
||||
}
|
||||
|
||||
/**
|
||||
* 仅提取纯文本(用于系统提示词和旧行为)
|
||||
*/
|
||||
function extractOpenAIContent(msg: OpenAIMessage): string {
|
||||
const blocks = extractOpenAIContentBlocks(msg);
|
||||
if (typeof blocks === 'string') return blocks;
|
||||
return blocks.filter(b => b.type === 'text').map(b => b.text).join('\n');
|
||||
}
|
||||
|
||||
// ==================== 主处理入口 ====================
|
||||
|
||||
export async function handleOpenAIChatCompletions(req: Request, res: Response): Promise<void> {
|
||||
@@ -152,6 +183,9 @@ export async function handleOpenAIChatCompletions(req: Request, res: Response):
|
||||
// Step 1: OpenAI → Anthropic 格式
|
||||
const anthropicReq = convertToAnthropicRequest(body);
|
||||
|
||||
// Step 1.5: 应用视觉拦截器(如果启用,会将 anthropicReq 中的 image 转换为 text)
|
||||
await applyVisionInterceptor(anthropicReq.messages);
|
||||
|
||||
// Step 2: Anthropic → Cursor 格式(复用现有管道)
|
||||
const cursorReq = convertToCursorRequest(anthropicReq);
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ export interface AnthropicMessage {
|
||||
export interface AnthropicContentBlock {
|
||||
type: 'text' | 'tool_use' | 'tool_result' | 'image';
|
||||
text?: string;
|
||||
// image fields
|
||||
source?: { type: string; media_type?: string; data: string };
|
||||
// tool_use fields
|
||||
id?: string;
|
||||
name?: string;
|
||||
@@ -91,6 +93,13 @@ export interface AppConfig {
|
||||
timeout: number;
|
||||
proxy?: string;
|
||||
cursorModel: string;
|
||||
vision?: {
|
||||
enabled: boolean;
|
||||
mode: 'ocr' | 'api';
|
||||
baseUrl: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
};
|
||||
fingerprint: {
|
||||
userAgent: string;
|
||||
};
|
||||
|
||||
133
src/vision.ts
Normal file
133
src/vision.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
import { getConfig } from './config.js';
|
||||
import type { AnthropicMessage, AnthropicContentBlock } from './types.js';
|
||||
import { createWorker } from 'tesseract.js';
|
||||
|
||||
export async function applyVisionInterceptor(messages: AnthropicMessage[]): Promise<void> {
|
||||
const config = getConfig();
|
||||
if (!config.vision?.enabled) return;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (!Array.isArray(msg.content)) continue;
|
||||
|
||||
let hasImages = false;
|
||||
const newContent: AnthropicContentBlock[] = [];
|
||||
const imagesToAnalyze: AnthropicContentBlock[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'image') {
|
||||
hasImages = true;
|
||||
imagesToAnalyze.push(block);
|
||||
} else {
|
||||
newContent.push(block);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasImages && imagesToAnalyze.length > 0) {
|
||||
try {
|
||||
let descriptions = '';
|
||||
if (config.vision.mode === 'ocr') {
|
||||
console.log(`[Vision] 启用纯本地 OCR 模式,正在提取 ${imagesToAnalyze.length} 张图片上的文字... (无需 API Key)`);
|
||||
descriptions = await processWithLocalOCR(imagesToAnalyze);
|
||||
} else {
|
||||
console.log(`[Vision] 启用外部 API 模式,正在分析 ${imagesToAnalyze.length} 张图片...`);
|
||||
descriptions = await callVisionAPI(imagesToAnalyze);
|
||||
}
|
||||
|
||||
// Add descriptions as a simulated system text block
|
||||
newContent.push({
|
||||
type: 'text',
|
||||
text: `\n\n[System: The user attached ${imagesToAnalyze.length} image(s). Visual analysis/OCR extracted the following context:\n${descriptions}]\n\n`
|
||||
});
|
||||
|
||||
msg.content = newContent;
|
||||
} catch (e) {
|
||||
console.error("[Vision API Error]", e);
|
||||
newContent.push({
|
||||
type: 'text',
|
||||
text: `\n\n[System: The user attached image(s), but the Vision interceptor failed to process them. Error: ${(e as Error).message}]\n\n`
|
||||
});
|
||||
msg.content = newContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function processWithLocalOCR(imageBlocks: AnthropicContentBlock[]): Promise<string> {
|
||||
const worker = await createWorker('eng+chi_sim');
|
||||
let combinedText = '';
|
||||
|
||||
for (let i = 0; i < imageBlocks.length; i++) {
|
||||
const img = imageBlocks[i];
|
||||
let imageSource: string | Buffer = '';
|
||||
|
||||
if (img.type === 'image' && img.source?.data) {
|
||||
if (img.source.type === 'base64') {
|
||||
const mime = img.source.media_type || 'image/jpeg';
|
||||
imageSource = `data:${mime};base64,${img.source.data}`;
|
||||
} else if (img.source.type === 'url') {
|
||||
imageSource = img.source.data;
|
||||
}
|
||||
}
|
||||
|
||||
if (imageSource) {
|
||||
try {
|
||||
const { data: { text } } = await worker.recognize(imageSource);
|
||||
combinedText += `--- Image ${i + 1} OCR Text ---\n${text.trim() || '(No text detected in this image)'}\n\n`;
|
||||
} catch (err) {
|
||||
console.error(`[Vision OCR] Failed to parse image ${i + 1}:`, err);
|
||||
combinedText += `--- Image ${i + 1} ---\n(Failed to parse image with local OCR)\n\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await worker.terminate();
|
||||
return combinedText;
|
||||
}
|
||||
|
||||
async function callVisionAPI(imageBlocks: AnthropicContentBlock[]): Promise<string> {
|
||||
const config = getConfig().vision!;
|
||||
|
||||
// Construct an array of OpenAI format message parts
|
||||
const parts: any[] = [
|
||||
{ type: 'text', text: 'Please describe the attached images in detail. If they contain code, UI elements, or error messages, explicitly write them out.' }
|
||||
];
|
||||
|
||||
for (const img of imageBlocks) {
|
||||
if (img.type === 'image' && img.source?.data) {
|
||||
let url = '';
|
||||
// If it's a raw base64 string
|
||||
if (img.source.type === 'base64') {
|
||||
const mime = img.source.media_type || 'image/jpeg';
|
||||
url = `data:${mime};base64,${img.source.data}`;
|
||||
} else if (img.source.type === 'url') {
|
||||
// Handle remote URLs natively mapped from OpenAI payloads
|
||||
url = img.source.data;
|
||||
}
|
||||
if (url) {
|
||||
parts.push({ type: 'image_url', image_url: { url } });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const payload = {
|
||||
model: config.model,
|
||||
messages: [{ role: 'user', content: parts }],
|
||||
max_tokens: 1500
|
||||
};
|
||||
|
||||
const res = await fetch(config.baseUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${config.apiKey}`
|
||||
},
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Vision API returned status ${res.status}: ${await res.text()}`);
|
||||
}
|
||||
|
||||
const data = await res.json() as any;
|
||||
return data.choices?.[0]?.message?.content || 'No description returned.';
|
||||
}
|
||||
Reference in New Issue
Block a user