From 31934ae04c04dd6cda7e32c3308a8e7291da3721 Mon Sep 17 00:00:00 2001 From: MoYeRanQianZhi Date: Thu, 23 Apr 2026 01:15:47 +0800 Subject: [PATCH] feat(codex): enable image generation for all Codex upstream requests Codex CLI gates the built-in image_generation tool behind AuthMode::Chatgpt (OAuth only). When clients connect via API key auth through CPA, the tool is absent from requests, making image generation unavailable through the reverse proxy. Changes: 1. Inject image_generation tool (codex_executor.go): Add ensureImageGenerationTool() that appends {"type":"image_generation","output_format":"png"} to the tools array if not already present. Applied to all three execution paths: Execute, executeCompact, and ExecuteStream. 2. Route aliases for Codex CLI direct access (server.go): Add /backend-api/codex/responses routes that map to the same OpenAI Responses API handlers as /v1/responses. This allows Codex CLI to connect via chatgpt_base_url config while keeping AuthMode::Chatgpt, which enables the built-in image_generation tool on the client side. 3. Unit tests (codex_executor_imagegen_test.go): Cover no-tools, existing tools, already-present, empty array, and mixed built-in tool scenarios. --- internal/api/server.go | 9 ++ internal/runtime/executor/codex_executor.go | 21 +++++ .../executor/codex_executor_imagegen_test.go | 89 +++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 internal/runtime/executor/codex_executor_imagegen_test.go diff --git a/internal/api/server.go b/internal/api/server.go index 7c571e23c..32ae3164f 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -353,6 +353,15 @@ func (s *Server) setupRoutes() { v1.POST("/responses/compact", openaiResponsesHandlers.Compact) } + // Codex CLI direct route aliases (chatgpt_base_url compatible) + codexDirect := s.engine.Group("/backend-api/codex") + codexDirect.Use(AuthMiddleware(s.accessManager)) + { + codexDirect.GET("/responses", openaiResponsesHandlers.ResponsesWebsocket) + codexDirect.POST("/responses", openaiResponsesHandlers.Responses) + codexDirect.POST("/responses/compact", openaiResponsesHandlers.Compact) + } + // Gemini compatible API routes v1beta := s.engine.Group("/v1beta") v1beta.Use(AuthMiddleware(s.accessManager)) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 7d4d3edf8..543e2c277 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -180,6 +180,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body, _ = sjson.DeleteBytes(body, "safety_identifier") body, _ = sjson.DeleteBytes(body, "stream_options") body = normalizeCodexInstructions(body) + body = ensureImageGenerationTool(body) url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -326,6 +327,7 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "stream") body = normalizeCodexInstructions(body) + body = ensureImageGenerationTool(body) url := strings.TrimSuffix(baseURL, "/") + "/responses/compact" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -420,6 +422,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body, _ = sjson.DeleteBytes(body, "stream_options") body, _ = sjson.SetBytes(body, "model", baseModel) body = normalizeCodexInstructions(body) + body = ensureImageGenerationTool(body) url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -821,6 +824,24 @@ func normalizeCodexInstructions(body []byte) []byte { return body } +var imageGenToolJSON = []byte(`{"type":"image_generation","output_format":"png"}`) +var imageGenToolArrayJSON = []byte(`[{"type":"image_generation","output_format":"png"}]`) + +func ensureImageGenerationTool(body []byte) []byte { + tools := gjson.GetBytes(body, "tools") + if !tools.Exists() || !tools.IsArray() { + body, _ = sjson.SetRawBytes(body, "tools", imageGenToolArrayJSON) + return body + } + for _, t := range tools.Array() { + if t.Get("type").String() == "image_generation" { + return body + } + } + body, _ = sjson.SetRawBytes(body, "tools.-1", imageGenToolJSON) + return body +} + func isCodexModelCapacityError(errorBody []byte) bool { if len(errorBody) == 0 { return false diff --git a/internal/runtime/executor/codex_executor_imagegen_test.go b/internal/runtime/executor/codex_executor_imagegen_test.go new file mode 100644 index 000000000..43f42adee --- /dev/null +++ b/internal/runtime/executor/codex_executor_imagegen_test.go @@ -0,0 +1,89 @@ +package executor + +import ( + "testing" + + "github.com/tidwall/gjson" +) + +func TestEnsureImageGenerationTool_NoTools(t *testing.T) { + body := []byte(`{"model":"gpt-5.4","input":"draw a cat"}`) + result := ensureImageGenerationTool(body) + + tools := gjson.GetBytes(result, "tools") + if !tools.IsArray() { + t.Fatalf("expected tools array, got %v", tools.Type) + } + arr := tools.Array() + if len(arr) != 1 { + t.Fatalf("expected 1 tool, got %d", len(arr)) + } + if arr[0].Get("type").String() != "image_generation" { + t.Fatalf("expected type=image_generation, got %s", arr[0].Get("type").String()) + } + if arr[0].Get("output_format").String() != "png" { + t.Fatalf("expected output_format=png, got %s", arr[0].Get("output_format").String()) + } +} + +func TestEnsureImageGenerationTool_ExistingToolsWithoutImageGen(t *testing.T) { + body := []byte(`{"model":"gpt-5.4","tools":[{"type":"function","name":"get_weather","parameters":{}}]}`) + result := ensureImageGenerationTool(body) + + tools := gjson.GetBytes(result, "tools") + arr := tools.Array() + if len(arr) != 2 { + t.Fatalf("expected 2 tools, got %d", len(arr)) + } + if arr[0].Get("type").String() != "function" { + t.Fatalf("expected first tool type=function, got %s", arr[0].Get("type").String()) + } + if arr[1].Get("type").String() != "image_generation" { + t.Fatalf("expected second tool type=image_generation, got %s", arr[1].Get("type").String()) + } +} + +func TestEnsureImageGenerationTool_AlreadyPresent(t *testing.T) { + body := []byte(`{"model":"gpt-5.4","tools":[{"type":"image_generation","output_format":"webp"},{"type":"function","name":"f1"}]}`) + result := ensureImageGenerationTool(body) + + tools := gjson.GetBytes(result, "tools") + arr := tools.Array() + if len(arr) != 2 { + t.Fatalf("expected 2 tools (no duplicate), got %d", len(arr)) + } + if arr[0].Get("output_format").String() != "webp" { + t.Fatalf("expected original output_format=webp preserved, got %s", arr[0].Get("output_format").String()) + } +} + +func TestEnsureImageGenerationTool_EmptyToolsArray(t *testing.T) { + body := []byte(`{"model":"gpt-5.4","tools":[]}`) + result := ensureImageGenerationTool(body) + + tools := gjson.GetBytes(result, "tools") + arr := tools.Array() + if len(arr) != 1 { + t.Fatalf("expected 1 tool, got %d", len(arr)) + } + if arr[0].Get("type").String() != "image_generation" { + t.Fatalf("expected type=image_generation, got %s", arr[0].Get("type").String()) + } +} + +func TestEnsureImageGenerationTool_WebSearchAndImageGen(t *testing.T) { + body := []byte(`{"model":"gpt-5.4","tools":[{"type":"web_search"}]}`) + result := ensureImageGenerationTool(body) + + tools := gjson.GetBytes(result, "tools") + arr := tools.Array() + if len(arr) != 2 { + t.Fatalf("expected 2 tools, got %d", len(arr)) + } + if arr[0].Get("type").String() != "web_search" { + t.Fatalf("expected first tool type=web_search, got %s", arr[0].Get("type").String()) + } + if arr[1].Get("type").String() != "image_generation" { + t.Fatalf("expected second tool type=image_generation, got %s", arr[1].Get("type").String()) + } +}