fix(translator): improve tool response handling for non-string content

- Added `setToolCallOutputContent` to process various content types, including arrays and fallback cases.
- Implemented robust handling for specific tool output types like text, image URLs, and files, ensuring proper serialization.
- Improved fallback logic to handle unexpected or missing data.

Fixed: #2313
Closes: #2349
This commit is contained in:
Luis Pater
2026-05-04 05:50:01 +08:00
parent 38dad2afdf
commit 17be6442a8
2 changed files with 263 additions and 2 deletions

View File

@@ -121,13 +121,13 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
case "tool":
// Handle tool response messages as top-level function_call_output objects
toolCallID := m.Get("tool_call_id").String()
content := m.Get("content").String()
content := m.Get("content")
// Create function_call_output object
funcOutput := []byte(`{}`)
funcOutput, _ = sjson.SetBytes(funcOutput, "type", "function_call_output")
funcOutput, _ = sjson.SetBytes(funcOutput, "call_id", toolCallID)
funcOutput, _ = sjson.SetBytes(funcOutput, "output", content)
funcOutput = setToolCallOutputContent(funcOutput, content)
out, _ = sjson.SetRawBytes(out, "input.-1", funcOutput)
default:
@@ -359,6 +359,91 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
return out
}
func setToolCallOutputContent(funcOutput []byte, content gjson.Result) []byte {
switch {
case content.Type == gjson.String:
funcOutput, _ = sjson.SetBytes(funcOutput, "output", content.String())
case content.IsArray():
output := []byte(`[]`)
for _, item := range content.Array() {
output = appendToolOutputContentPart(output, item)
}
funcOutput, _ = sjson.SetRawBytes(funcOutput, "output", output)
default:
fallbackOutput := content.Raw
if fallbackOutput == "" {
fallbackOutput = content.String()
}
funcOutput, _ = sjson.SetBytes(funcOutput, "output", fallbackOutput)
}
return funcOutput
}
func appendToolOutputContentPart(output []byte, item gjson.Result) []byte {
switch item.Get("type").String() {
case "text":
part := []byte(`{}`)
part, _ = sjson.SetBytes(part, "type", "input_text")
part, _ = sjson.SetBytes(part, "text", item.Get("text").String())
output, _ = sjson.SetRawBytes(output, "-1", part)
case "image_url":
imageURL := item.Get("image_url.url").String()
fileID := item.Get("image_url.file_id").String()
if imageURL == "" && fileID == "" {
return appendToolOutputFallbackPart(output, item)
}
part := []byte(`{}`)
part, _ = sjson.SetBytes(part, "type", "input_image")
if imageURL != "" {
part, _ = sjson.SetBytes(part, "image_url", imageURL)
}
if fileID != "" {
part, _ = sjson.SetBytes(part, "file_id", fileID)
}
if detail := item.Get("image_url.detail").String(); detail != "" {
part, _ = sjson.SetBytes(part, "detail", detail)
}
output, _ = sjson.SetRawBytes(output, "-1", part)
case "file":
fileID := item.Get("file.file_id").String()
fileData := item.Get("file.file_data").String()
fileURL := item.Get("file.file_url").String()
if fileID == "" && fileData == "" && fileURL == "" {
return appendToolOutputFallbackPart(output, item)
}
part := []byte(`{}`)
part, _ = sjson.SetBytes(part, "type", "input_file")
if fileID != "" {
part, _ = sjson.SetBytes(part, "file_id", fileID)
}
if fileData != "" {
part, _ = sjson.SetBytes(part, "file_data", fileData)
}
if fileURL != "" {
part, _ = sjson.SetBytes(part, "file_url", fileURL)
}
if filename := item.Get("file.filename").String(); filename != "" {
part, _ = sjson.SetBytes(part, "filename", filename)
}
output, _ = sjson.SetRawBytes(output, "-1", part)
default:
output = appendToolOutputFallbackPart(output, item)
}
return output
}
func appendToolOutputFallbackPart(output []byte, item gjson.Result) []byte {
text := item.Raw
if text == "" {
text = item.String()
}
part := []byte(`{}`)
part, _ = sjson.SetBytes(part, "type", "input_text")
part, _ = sjson.SetBytes(part, "text", text)
output, _ = sjson.SetRawBytes(output, "-1", part)
return output
}
// shortenNameIfNeeded applies the simple shortening rule for a single name.
// If the name length exceeds 64, it will try to preserve the "mcp__" prefix and last segment.
// Otherwise it truncates to 64 characters.

View File

@@ -176,6 +176,182 @@ func TestToolCallWithContent(t *testing.T) {
}
}
func TestToolCallOutputWithMultimodalContent(t *testing.T) {
input := []byte(`{
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Show me the generated result."},
{
"role": "assistant",
"content": null,
"tool_calls": [
{
"id": "call_output_1",
"type": "function",
"function": {"name": "render_output", "arguments": "{}"}
}
]
},
{
"role": "tool",
"tool_call_id": "call_output_1",
"content": [
{"type":"text","text":"Rendered result attached."},
{"type":"image_url","image_url":{"url":"https://example.com/generated.png","detail":"high"}},
{"type":"image_url","image_url":{"file_id":"file-img-123"}},
{"type":"file","file":{"file_id":"file-doc-123","filename":"doc.pdf"}},
{"type":"file","file":{"file_data":"SGVsbG8=","filename":"inline.txt"}},
{"type":"file","file":{"file_url":"https://example.com/report.pdf","filename":"report.pdf"}}
]
}
],
"tools": [
{
"type": "function",
"function": {"name": "render_output", "description": "Render output", "parameters": {"type": "object", "properties": {}}}
}
]
}`)
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
result := string(out)
output := gjson.Get(result, "input.2.output")
if !output.IsArray() {
t.Fatalf("expected tool output to be an array, got: %s", output.Raw)
}
parts := output.Array()
if len(parts) != 6 {
t.Fatalf("expected 6 output parts, got %d: %s", len(parts), output.Raw)
}
if parts[0].Get("type").String() != "input_text" || parts[0].Get("text").String() != "Rendered result attached." {
t.Fatalf("part 0: expected input_text with rendered text, got %s", parts[0].Raw)
}
if parts[1].Get("type").String() != "input_image" {
t.Fatalf("part 1: expected input_image, got %s", parts[1].Raw)
}
if parts[1].Get("image_url").String() != "https://example.com/generated.png" {
t.Errorf("part 1: unexpected image_url %s", parts[1].Get("image_url").String())
}
if parts[1].Get("detail").String() != "high" {
t.Errorf("part 1: unexpected detail %s", parts[1].Get("detail").String())
}
if parts[2].Get("type").String() != "input_image" || parts[2].Get("file_id").String() != "file-img-123" {
t.Fatalf("part 2: expected file_id-backed input_image, got %s", parts[2].Raw)
}
if parts[3].Get("type").String() != "input_file" || parts[3].Get("file_id").String() != "file-doc-123" {
t.Fatalf("part 3: expected file_id-backed input_file, got %s", parts[3].Raw)
}
if parts[3].Get("filename").String() != "doc.pdf" {
t.Errorf("part 3: unexpected filename %s", parts[3].Get("filename").String())
}
if parts[4].Get("type").String() != "input_file" || parts[4].Get("file_data").String() != "SGVsbG8=" {
t.Fatalf("part 4: expected file_data-backed input_file, got %s", parts[4].Raw)
}
if parts[5].Get("type").String() != "input_file" || parts[5].Get("file_url").String() != "https://example.com/report.pdf" {
t.Fatalf("part 5: expected file_url-backed input_file, got %s", parts[5].Raw)
}
}
func TestToolCallOutputFallsBackForInvalidStructuredParts(t *testing.T) {
input := []byte(`{
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Check tool output."},
{
"role": "assistant",
"content": null,
"tool_calls": [
{"id": "call_invalid_parts", "type": "function", "function": {"name": "inspect", "arguments": "{}"}}
]
},
{
"role": "tool",
"tool_call_id": "call_invalid_parts",
"content": [
{"type":"image_url","image_url":{"detail":"low"}},
{"type":"file","file":{"filename":"orphan.txt"}},
{"type":"unknown_type","foo":"bar","nested":{"a":1}}
]
}
],
"tools": [
{"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}}
]
}`)
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
result := string(out)
parts := gjson.Get(result, "input.2.output").Array()
if len(parts) != 3 {
t.Fatalf("expected 3 output parts, got %d: %s", len(parts), gjson.Get(result, "input.2.output").Raw)
}
expectedFallbacks := []string{
`{"type":"image_url","image_url":{"detail":"low"}}`,
`{"type":"file","file":{"filename":"orphan.txt"}}`,
`{"type":"unknown_type","foo":"bar","nested":{"a":1}}`,
}
for i, expectedFallback := range expectedFallbacks {
if parts[i].Get("type").String() != "input_text" {
t.Fatalf("part %d: expected input_text fallback, got %s", i, parts[i].Raw)
}
if parts[i].Get("text").String() != expectedFallback {
t.Fatalf("part %d: expected fallback %s, got %s", i, expectedFallback, parts[i].Get("text").String())
}
}
}
func TestToolCallOutputWithNonStringJSONContent(t *testing.T) {
tests := []struct {
name string
content string
expectedOutput string
}{
{name: "null", content: `null`, expectedOutput: `null`},
{name: "object", content: `{"status":"ok","count":2}`, expectedOutput: `{"status":"ok","count":2}`},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
input := []byte(`{
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Check tool output."},
{
"role": "assistant",
"content": null,
"tool_calls": [
{"id": "call_json", "type": "function", "function": {"name": "inspect", "arguments": "{}"}}
]
},
{
"role": "tool",
"tool_call_id": "call_json",
"content": ` + tt.content + `
}
],
"tools": [
{"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}}
]
}`)
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
result := string(out)
output := gjson.Get(result, "input.2.output")
if !output.Exists() {
t.Fatalf("expected output field to exist: %s", gjson.Get(result, "input.2").Raw)
}
if output.String() != tt.expectedOutput {
t.Fatalf("expected output %s, got %s", tt.expectedOutput, output.String())
}
})
}
}
// Parallel tool calls: assistant invokes 3 tools at once, all call_ids
// and outputs must be translated and paired correctly.
func TestMultipleToolCalls(t *testing.T) {