diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request.go b/internal/translator/codex/openai/chat-completions/codex_openai_request.go index 6cc701e70..569e06e31 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go @@ -121,13 +121,13 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b case "tool": // Handle tool response messages as top-level function_call_output objects toolCallID := m.Get("tool_call_id").String() - content := m.Get("content").String() + content := m.Get("content") // Create function_call_output object funcOutput := []byte(`{}`) funcOutput, _ = sjson.SetBytes(funcOutput, "type", "function_call_output") funcOutput, _ = sjson.SetBytes(funcOutput, "call_id", toolCallID) - funcOutput, _ = sjson.SetBytes(funcOutput, "output", content) + funcOutput = setToolCallOutputContent(funcOutput, content) out, _ = sjson.SetRawBytes(out, "input.-1", funcOutput) default: @@ -359,6 +359,91 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b return out } +func setToolCallOutputContent(funcOutput []byte, content gjson.Result) []byte { + switch { + case content.Type == gjson.String: + funcOutput, _ = sjson.SetBytes(funcOutput, "output", content.String()) + case content.IsArray(): + output := []byte(`[]`) + for _, item := range content.Array() { + output = appendToolOutputContentPart(output, item) + } + funcOutput, _ = sjson.SetRawBytes(funcOutput, "output", output) + default: + fallbackOutput := content.Raw + if fallbackOutput == "" { + fallbackOutput = content.String() + } + funcOutput, _ = sjson.SetBytes(funcOutput, "output", fallbackOutput) + } + return funcOutput +} + +func appendToolOutputContentPart(output []byte, item gjson.Result) []byte { + switch item.Get("type").String() { + case "text": + part := []byte(`{}`) + part, _ = sjson.SetBytes(part, "type", "input_text") + part, _ = sjson.SetBytes(part, "text", item.Get("text").String()) + output, _ = sjson.SetRawBytes(output, "-1", part) + case "image_url": + imageURL := item.Get("image_url.url").String() + fileID := item.Get("image_url.file_id").String() + if imageURL == "" && fileID == "" { + return appendToolOutputFallbackPart(output, item) + } + part := []byte(`{}`) + part, _ = sjson.SetBytes(part, "type", "input_image") + if imageURL != "" { + part, _ = sjson.SetBytes(part, "image_url", imageURL) + } + if fileID != "" { + part, _ = sjson.SetBytes(part, "file_id", fileID) + } + if detail := item.Get("image_url.detail").String(); detail != "" { + part, _ = sjson.SetBytes(part, "detail", detail) + } + output, _ = sjson.SetRawBytes(output, "-1", part) + case "file": + fileID := item.Get("file.file_id").String() + fileData := item.Get("file.file_data").String() + fileURL := item.Get("file.file_url").String() + if fileID == "" && fileData == "" && fileURL == "" { + return appendToolOutputFallbackPart(output, item) + } + part := []byte(`{}`) + part, _ = sjson.SetBytes(part, "type", "input_file") + if fileID != "" { + part, _ = sjson.SetBytes(part, "file_id", fileID) + } + if fileData != "" { + part, _ = sjson.SetBytes(part, "file_data", fileData) + } + if fileURL != "" { + part, _ = sjson.SetBytes(part, "file_url", fileURL) + } + if filename := item.Get("file.filename").String(); filename != "" { + part, _ = sjson.SetBytes(part, "filename", filename) + } + output, _ = sjson.SetRawBytes(output, "-1", part) + default: + output = appendToolOutputFallbackPart(output, item) + } + return output +} + +func appendToolOutputFallbackPart(output []byte, item gjson.Result) []byte { + text := item.Raw + if text == "" { + text = item.String() + } + part := []byte(`{}`) + part, _ = sjson.SetBytes(part, "type", "input_text") + part, _ = sjson.SetBytes(part, "text", text) + output, _ = sjson.SetRawBytes(output, "-1", part) + return output +} + // shortenNameIfNeeded applies the simple shortening rule for a single name. // If the name length exceeds 64, it will try to preserve the "mcp__" prefix and last segment. // Otherwise it truncates to 64 characters. diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go index 84c8dad2c..e31db6d37 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go @@ -176,6 +176,182 @@ func TestToolCallWithContent(t *testing.T) { } } +func TestToolCallOutputWithMultimodalContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Show me the generated result."}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_output_1", + "type": "function", + "function": {"name": "render_output", "arguments": "{}"} + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_output_1", + "content": [ + {"type":"text","text":"Rendered result attached."}, + {"type":"image_url","image_url":{"url":"https://example.com/generated.png","detail":"high"}}, + {"type":"image_url","image_url":{"file_id":"file-img-123"}}, + {"type":"file","file":{"file_id":"file-doc-123","filename":"doc.pdf"}}, + {"type":"file","file":{"file_data":"SGVsbG8=","filename":"inline.txt"}}, + {"type":"file","file":{"file_url":"https://example.com/report.pdf","filename":"report.pdf"}} + ] + } + ], + "tools": [ + { + "type": "function", + "function": {"name": "render_output", "description": "Render output", "parameters": {"type": "object", "properties": {}}} + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + output := gjson.Get(result, "input.2.output") + if !output.IsArray() { + t.Fatalf("expected tool output to be an array, got: %s", output.Raw) + } + + parts := output.Array() + if len(parts) != 6 { + t.Fatalf("expected 6 output parts, got %d: %s", len(parts), output.Raw) + } + if parts[0].Get("type").String() != "input_text" || parts[0].Get("text").String() != "Rendered result attached." { + t.Fatalf("part 0: expected input_text with rendered text, got %s", parts[0].Raw) + } + if parts[1].Get("type").String() != "input_image" { + t.Fatalf("part 1: expected input_image, got %s", parts[1].Raw) + } + if parts[1].Get("image_url").String() != "https://example.com/generated.png" { + t.Errorf("part 1: unexpected image_url %s", parts[1].Get("image_url").String()) + } + if parts[1].Get("detail").String() != "high" { + t.Errorf("part 1: unexpected detail %s", parts[1].Get("detail").String()) + } + if parts[2].Get("type").String() != "input_image" || parts[2].Get("file_id").String() != "file-img-123" { + t.Fatalf("part 2: expected file_id-backed input_image, got %s", parts[2].Raw) + } + if parts[3].Get("type").String() != "input_file" || parts[3].Get("file_id").String() != "file-doc-123" { + t.Fatalf("part 3: expected file_id-backed input_file, got %s", parts[3].Raw) + } + if parts[3].Get("filename").String() != "doc.pdf" { + t.Errorf("part 3: unexpected filename %s", parts[3].Get("filename").String()) + } + if parts[4].Get("type").String() != "input_file" || parts[4].Get("file_data").String() != "SGVsbG8=" { + t.Fatalf("part 4: expected file_data-backed input_file, got %s", parts[4].Raw) + } + if parts[5].Get("type").String() != "input_file" || parts[5].Get("file_url").String() != "https://example.com/report.pdf" { + t.Fatalf("part 5: expected file_url-backed input_file, got %s", parts[5].Raw) + } +} + +func TestToolCallOutputFallsBackForInvalidStructuredParts(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Check tool output."}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + {"id": "call_invalid_parts", "type": "function", "function": {"name": "inspect", "arguments": "{}"}} + ] + }, + { + "role": "tool", + "tool_call_id": "call_invalid_parts", + "content": [ + {"type":"image_url","image_url":{"detail":"low"}}, + {"type":"file","file":{"filename":"orphan.txt"}}, + {"type":"unknown_type","foo":"bar","nested":{"a":1}} + ] + } + ], + "tools": [ + {"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}} + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + parts := gjson.Get(result, "input.2.output").Array() + if len(parts) != 3 { + t.Fatalf("expected 3 output parts, got %d: %s", len(parts), gjson.Get(result, "input.2.output").Raw) + } + + expectedFallbacks := []string{ + `{"type":"image_url","image_url":{"detail":"low"}}`, + `{"type":"file","file":{"filename":"orphan.txt"}}`, + `{"type":"unknown_type","foo":"bar","nested":{"a":1}}`, + } + for i, expectedFallback := range expectedFallbacks { + if parts[i].Get("type").String() != "input_text" { + t.Fatalf("part %d: expected input_text fallback, got %s", i, parts[i].Raw) + } + if parts[i].Get("text").String() != expectedFallback { + t.Fatalf("part %d: expected fallback %s, got %s", i, expectedFallback, parts[i].Get("text").String()) + } + } +} + +func TestToolCallOutputWithNonStringJSONContent(t *testing.T) { + tests := []struct { + name string + content string + expectedOutput string + }{ + {name: "null", content: `null`, expectedOutput: `null`}, + {name: "object", content: `{"status":"ok","count":2}`, expectedOutput: `{"status":"ok","count":2}`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Check tool output."}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + {"id": "call_json", "type": "function", "function": {"name": "inspect", "arguments": "{}"}} + ] + }, + { + "role": "tool", + "tool_call_id": "call_json", + "content": ` + tt.content + ` + } + ], + "tools": [ + {"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}} + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + output := gjson.Get(result, "input.2.output") + if !output.Exists() { + t.Fatalf("expected output field to exist: %s", gjson.Get(result, "input.2").Raw) + } + if output.String() != tt.expectedOutput { + t.Fatalf("expected output %s, got %s", tt.expectedOutput, output.String()) + } + }) + } +} + // Parallel tool calls: assistant invokes 3 tools at once, all call_ids // and outputs must be translated and paired correctly. func TestMultipleToolCalls(t *testing.T) {