From ee8a7135dd853ffa4e0408df2abc4afc16da490b Mon Sep 17 00:00:00 2001
From: majorcheng <major@thu.cn>
Date: Wed, 18 Mar 2026 09:19:37 +0800
Subject: [PATCH] Fix OpenAI stream usage in final chunk

---
 src/openai-handler.ts             |  23 +++--
 src/openai-types.ts               |   6 ++
 test/unit-openai-stream-usage.mjs | 151 ++++++++++++++++++++++++++++++
 3 files changed, 174 insertions(+), 6 deletions(-)
 create mode 100644 test/unit-openai-stream-usage.mjs

diff --git a/src/openai-handler.ts b/src/openai-handler.ts
index f595873..576a33c 100644
--- a/src/openai-handler.ts
+++ b/src/openai-handler.ts
@@ -569,6 +569,19 @@ function writeOpenAITextDelta(
     });
 }
 
+function buildOpenAIUsage(
+    anthropicReq: AnthropicRequest,
+    outputText: string,
+): { prompt_tokens: number; completion_tokens: number; total_tokens: number } {
+    const promptTokens = estimateInputTokens(anthropicReq);
+    const completionTokens = Math.ceil(outputText.length / 3);
+    return {
+        prompt_tokens: promptTokens,
+        completion_tokens: completionTokens,
+        total_tokens: promptTokens + completionTokens,
+    };
+}
+
 function writeOpenAIReasoningDelta(
     res: Response,
     id: string,
@@ -723,6 +736,7 @@ async function handleOpenAIIncrementalTextStream(
             delta: {},
             finish_reason: 'stop',
         }],
+        usage: buildOpenAIUsage(anthropicReq, streamer.hasSentText() ? (finalVisibleText || finalRawResponse) : finalTextToSend),
     });
 
     res.write('data: [DONE]\n\n');
@@ -950,7 +964,7 @@ async function handleOpenAIStream(
             }
         }
 
-        // 发送完成 chunk
+        // 发送完成 chunk（带 usage，兼容依赖最终 usage 帧的 OpenAI 客户端/代理）
         writeOpenAISSE(res, {
             id, object: 'chat.completion.chunk', created, model,
             choices: [{
@@ -958,6 +972,7 @@ async function handleOpenAIStream(
                 delta: {},
                 finish_reason: finishReason,
             }],
+            usage: buildOpenAIUsage(anthropicReq, fullResponse),
         });
 
         res.write('data: [DONE]\n\n');
@@ -1097,11 +1112,7 @@ async function handleOpenAINonStream(
             },
             finish_reason: finishReason,
         }],
-        usage: {
-            prompt_tokens: estimateInputTokens(anthropicReq),
-            completion_tokens: Math.ceil(fullText.length / 3),
-            total_tokens: estimateInputTokens(anthropicReq) + Math.ceil(fullText.length / 3),
-        },
+        usage: buildOpenAIUsage(anthropicReq, fullText),
     };
 
     res.json(response);
diff --git a/src/openai-types.ts b/src/openai-types.ts
index 225ae5f..30d77a0 100644
--- a/src/openai-types.ts
+++ b/src/openai-types.ts
@@ -4,6 +4,7 @@ export interface OpenAIChatRequest {
     model: string;
     messages: OpenAIMessage[];
     stream?: boolean;
+    stream_options?: { include_usage?: boolean };
     temperature?: number;
     top_p?: number;
     max_tokens?: number;
@@ -90,6 +91,11 @@ export interface OpenAIChatCompletionChunk {
     created: number;
     model: string;
     choices: OpenAIStreamChoice[];
+    usage?: {
+        prompt_tokens: number;
+        completion_tokens: number;
+        total_tokens: number;
+    };
 }
 
 export interface OpenAIStreamChoice {
diff --git a/test/unit-openai-stream-usage.mjs b/test/unit-openai-stream-usage.mjs
new file mode 100644
index 0000000..d7bd51a
--- /dev/null
+++ b/test/unit-openai-stream-usage.mjs
@@ -0,0 +1,151 @@
+/**
+ * test/unit-openai-stream-usage.mjs
+ *
+ * 回归测试：/v1/chat/completions 流式最后一帧应携带 usage
+ * 运行方式：npm run build && node test/unit-openai-stream-usage.mjs
+ */
+
+import { handleOpenAIChatCompletions } from '../dist/openai-handler.js';
+
+let passed = 0;
+let failed = 0;
+
+function test(name, fn) {
+    Promise.resolve()
+        .then(fn)
+        .then(() => {
+            console.log(`  ✅  ${name}`);
+            passed++;
+        })
+        .catch((e) => {
+            console.error(`  ❌  ${name}`);
+            console.error(`      ${e.message}`);
+            failed++;
+        });
+}
+
+function assert(condition, msg) {
+    if (!condition) throw new Error(msg || 'Assertion failed');
+}
+
+function assertEqual(a, b, msg) {
+    const as = JSON.stringify(a), bs = JSON.stringify(b);
+    if (as !== bs) throw new Error(msg || `Expected ${bs}, got ${as}`);
+}
+
+function createCursorSseResponse(deltas) {
+    const encoder = new TextEncoder();
+    const stream = new ReadableStream({
+        start(controller) {
+            for (const delta of deltas) {
+                controller.enqueue(encoder.encode(`data: ${JSON.stringify({ type: 'text-delta', delta })}\n\n`));
+            }
+            controller.close();
+        },
+    });
+
+    return new Response(stream, {
+        status: 200,
+        headers: { 'Content-Type': 'text/event-stream' },
+    });
+}
+
+class MockResponse {
+    constructor() {
+        this.statusCode = 200;
+        this.headers = {};
+        this.body = '';
+        this.ended = false;
+    }
+
+    writeHead(statusCode, headers) {
+        this.statusCode = statusCode;
+        this.headers = { ...this.headers, ...headers };
+    }
+
+    write(chunk) {
+        this.body += String(chunk);
+        return true;
+    }
+
+    end(chunk = '') {
+        this.body += String(chunk);
+        this.ended = true;
+    }
+}
+
+function extractDataChunks(sseText) {
+    return sseText
+        .split('\n\n')
+        .map(part => part.trim())
+        .filter(Boolean)
+        .filter(part => part.startsWith('data: '))
+        .map(part => part.slice(6))
+        .filter(part => part !== '[DONE]')
+        .map(part => JSON.parse(part));
+}
+
+console.log('\n📦 [1] OpenAI Chat Completions 流式 usage 回归\n');
+
+const pending = [];
+
+pending.push((async () => {
+    const originalFetch = global.fetch;
+
+    try {
+        global.fetch = async () => createCursorSseResponse(['Hello', ' world from Cursor']);
+
+        const req = {
+            method: 'POST',
+            path: '/v1/chat/completions',
+            body: {
+                model: 'gpt-4.1',
+                stream: true,
+                messages: [
+                    { role: 'user', content: 'Write a short greeting in English.' },
+                ],
+            },
+        };
+        const res = new MockResponse();
+
+        await handleOpenAIChatCompletions(req, res);
+
+        assertEqual(res.statusCode, 200, 'statusCode 应为 200');
+        assert(res.ended, '响应应结束');
+
+        const chunks = extractDataChunks(res.body);
+        assert(chunks.length >= 2, '至少应包含 role chunk 和完成 chunk');
+
+        const lastChunk = chunks[chunks.length - 1];
+        assertEqual(lastChunk.object, 'chat.completion.chunk');
+        assert(lastChunk.usage, '最后一帧应包含 usage');
+        assert(typeof lastChunk.usage.prompt_tokens === 'number' && lastChunk.usage.prompt_tokens > 0, 'prompt_tokens 应为正数');
+        assert(typeof lastChunk.usage.completion_tokens === 'number' && lastChunk.usage.completion_tokens > 0, 'completion_tokens 应为正数');
+        assertEqual(
+            lastChunk.usage.total_tokens,
+            lastChunk.usage.prompt_tokens + lastChunk.usage.completion_tokens,
+            'total_tokens 应等于 prompt_tokens + completion_tokens'
+        );
+        assertEqual(lastChunk.choices[0].finish_reason, 'stop', '最后一帧 finish_reason 应为 stop');
+
+        const contentChunks = chunks.filter(chunk => chunk.choices?.[0]?.delta?.content);
+        assert(contentChunks.length > 0, '应输出至少一个 content chunk');
+    } finally {
+        global.fetch = originalFetch;
+    }
+})().then(() => {
+    console.log('  ✅  流式最后一帧携带 usage');
+    passed++;
+}).catch((e) => {
+    console.error('  ❌  流式最后一帧携带 usage');
+    console.error(`      ${e.message}`);
+    failed++;
+}));
+
+await Promise.all(pending);
+
+console.log('\n' + '═'.repeat(55));
+console.log(`  结果: ${passed} 通过 / ${failed} 失败 / ${passed + failed} 总计`);
+console.log('═'.repeat(55) + '\n');
+
+if (failed > 0) process.exit(1);