mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-05-22 20:29:40 +08:00
fix(translator): improve tool response handling for non-string content
- Added `setToolCallOutputContent` to process various content types, including arrays and fallback cases. - Implemented robust handling for specific tool output types like text, image URLs, and files, ensuring proper serialization. - Improved fallback logic to handle unexpected or missing data. Fixed: #2313 Closes: #2349
This commit is contained in:
@@ -121,13 +121,13 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
|
||||
case "tool":
|
||||
// Handle tool response messages as top-level function_call_output objects
|
||||
toolCallID := m.Get("tool_call_id").String()
|
||||
content := m.Get("content").String()
|
||||
content := m.Get("content")
|
||||
|
||||
// Create function_call_output object
|
||||
funcOutput := []byte(`{}`)
|
||||
funcOutput, _ = sjson.SetBytes(funcOutput, "type", "function_call_output")
|
||||
funcOutput, _ = sjson.SetBytes(funcOutput, "call_id", toolCallID)
|
||||
funcOutput, _ = sjson.SetBytes(funcOutput, "output", content)
|
||||
funcOutput = setToolCallOutputContent(funcOutput, content)
|
||||
out, _ = sjson.SetRawBytes(out, "input.-1", funcOutput)
|
||||
|
||||
default:
|
||||
@@ -359,6 +359,91 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
|
||||
return out
|
||||
}
|
||||
|
||||
func setToolCallOutputContent(funcOutput []byte, content gjson.Result) []byte {
|
||||
switch {
|
||||
case content.Type == gjson.String:
|
||||
funcOutput, _ = sjson.SetBytes(funcOutput, "output", content.String())
|
||||
case content.IsArray():
|
||||
output := []byte(`[]`)
|
||||
for _, item := range content.Array() {
|
||||
output = appendToolOutputContentPart(output, item)
|
||||
}
|
||||
funcOutput, _ = sjson.SetRawBytes(funcOutput, "output", output)
|
||||
default:
|
||||
fallbackOutput := content.Raw
|
||||
if fallbackOutput == "" {
|
||||
fallbackOutput = content.String()
|
||||
}
|
||||
funcOutput, _ = sjson.SetBytes(funcOutput, "output", fallbackOutput)
|
||||
}
|
||||
return funcOutput
|
||||
}
|
||||
|
||||
func appendToolOutputContentPart(output []byte, item gjson.Result) []byte {
|
||||
switch item.Get("type").String() {
|
||||
case "text":
|
||||
part := []byte(`{}`)
|
||||
part, _ = sjson.SetBytes(part, "type", "input_text")
|
||||
part, _ = sjson.SetBytes(part, "text", item.Get("text").String())
|
||||
output, _ = sjson.SetRawBytes(output, "-1", part)
|
||||
case "image_url":
|
||||
imageURL := item.Get("image_url.url").String()
|
||||
fileID := item.Get("image_url.file_id").String()
|
||||
if imageURL == "" && fileID == "" {
|
||||
return appendToolOutputFallbackPart(output, item)
|
||||
}
|
||||
part := []byte(`{}`)
|
||||
part, _ = sjson.SetBytes(part, "type", "input_image")
|
||||
if imageURL != "" {
|
||||
part, _ = sjson.SetBytes(part, "image_url", imageURL)
|
||||
}
|
||||
if fileID != "" {
|
||||
part, _ = sjson.SetBytes(part, "file_id", fileID)
|
||||
}
|
||||
if detail := item.Get("image_url.detail").String(); detail != "" {
|
||||
part, _ = sjson.SetBytes(part, "detail", detail)
|
||||
}
|
||||
output, _ = sjson.SetRawBytes(output, "-1", part)
|
||||
case "file":
|
||||
fileID := item.Get("file.file_id").String()
|
||||
fileData := item.Get("file.file_data").String()
|
||||
fileURL := item.Get("file.file_url").String()
|
||||
if fileID == "" && fileData == "" && fileURL == "" {
|
||||
return appendToolOutputFallbackPart(output, item)
|
||||
}
|
||||
part := []byte(`{}`)
|
||||
part, _ = sjson.SetBytes(part, "type", "input_file")
|
||||
if fileID != "" {
|
||||
part, _ = sjson.SetBytes(part, "file_id", fileID)
|
||||
}
|
||||
if fileData != "" {
|
||||
part, _ = sjson.SetBytes(part, "file_data", fileData)
|
||||
}
|
||||
if fileURL != "" {
|
||||
part, _ = sjson.SetBytes(part, "file_url", fileURL)
|
||||
}
|
||||
if filename := item.Get("file.filename").String(); filename != "" {
|
||||
part, _ = sjson.SetBytes(part, "filename", filename)
|
||||
}
|
||||
output, _ = sjson.SetRawBytes(output, "-1", part)
|
||||
default:
|
||||
output = appendToolOutputFallbackPart(output, item)
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
func appendToolOutputFallbackPart(output []byte, item gjson.Result) []byte {
|
||||
text := item.Raw
|
||||
if text == "" {
|
||||
text = item.String()
|
||||
}
|
||||
part := []byte(`{}`)
|
||||
part, _ = sjson.SetBytes(part, "type", "input_text")
|
||||
part, _ = sjson.SetBytes(part, "text", text)
|
||||
output, _ = sjson.SetRawBytes(output, "-1", part)
|
||||
return output
|
||||
}
|
||||
|
||||
// shortenNameIfNeeded applies the simple shortening rule for a single name.
|
||||
// If the name length exceeds 64, it will try to preserve the "mcp__" prefix and last segment.
|
||||
// Otherwise it truncates to 64 characters.
|
||||
|
||||
@@ -176,6 +176,182 @@ func TestToolCallWithContent(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolCallOutputWithMultimodalContent(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Show me the generated result."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_output_1",
|
||||
"type": "function",
|
||||
"function": {"name": "render_output", "arguments": "{}"}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_output_1",
|
||||
"content": [
|
||||
{"type":"text","text":"Rendered result attached."},
|
||||
{"type":"image_url","image_url":{"url":"https://example.com/generated.png","detail":"high"}},
|
||||
{"type":"image_url","image_url":{"file_id":"file-img-123"}},
|
||||
{"type":"file","file":{"file_id":"file-doc-123","filename":"doc.pdf"}},
|
||||
{"type":"file","file":{"file_data":"SGVsbG8=","filename":"inline.txt"}},
|
||||
{"type":"file","file":{"file_url":"https://example.com/report.pdf","filename":"report.pdf"}}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {"name": "render_output", "description": "Render output", "parameters": {"type": "object", "properties": {}}}
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
|
||||
result := string(out)
|
||||
|
||||
output := gjson.Get(result, "input.2.output")
|
||||
if !output.IsArray() {
|
||||
t.Fatalf("expected tool output to be an array, got: %s", output.Raw)
|
||||
}
|
||||
|
||||
parts := output.Array()
|
||||
if len(parts) != 6 {
|
||||
t.Fatalf("expected 6 output parts, got %d: %s", len(parts), output.Raw)
|
||||
}
|
||||
if parts[0].Get("type").String() != "input_text" || parts[0].Get("text").String() != "Rendered result attached." {
|
||||
t.Fatalf("part 0: expected input_text with rendered text, got %s", parts[0].Raw)
|
||||
}
|
||||
if parts[1].Get("type").String() != "input_image" {
|
||||
t.Fatalf("part 1: expected input_image, got %s", parts[1].Raw)
|
||||
}
|
||||
if parts[1].Get("image_url").String() != "https://example.com/generated.png" {
|
||||
t.Errorf("part 1: unexpected image_url %s", parts[1].Get("image_url").String())
|
||||
}
|
||||
if parts[1].Get("detail").String() != "high" {
|
||||
t.Errorf("part 1: unexpected detail %s", parts[1].Get("detail").String())
|
||||
}
|
||||
if parts[2].Get("type").String() != "input_image" || parts[2].Get("file_id").String() != "file-img-123" {
|
||||
t.Fatalf("part 2: expected file_id-backed input_image, got %s", parts[2].Raw)
|
||||
}
|
||||
if parts[3].Get("type").String() != "input_file" || parts[3].Get("file_id").String() != "file-doc-123" {
|
||||
t.Fatalf("part 3: expected file_id-backed input_file, got %s", parts[3].Raw)
|
||||
}
|
||||
if parts[3].Get("filename").String() != "doc.pdf" {
|
||||
t.Errorf("part 3: unexpected filename %s", parts[3].Get("filename").String())
|
||||
}
|
||||
if parts[4].Get("type").String() != "input_file" || parts[4].Get("file_data").String() != "SGVsbG8=" {
|
||||
t.Fatalf("part 4: expected file_data-backed input_file, got %s", parts[4].Raw)
|
||||
}
|
||||
if parts[5].Get("type").String() != "input_file" || parts[5].Get("file_url").String() != "https://example.com/report.pdf" {
|
||||
t.Fatalf("part 5: expected file_url-backed input_file, got %s", parts[5].Raw)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolCallOutputFallsBackForInvalidStructuredParts(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Check tool output."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
"tool_calls": [
|
||||
{"id": "call_invalid_parts", "type": "function", "function": {"name": "inspect", "arguments": "{}"}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_invalid_parts",
|
||||
"content": [
|
||||
{"type":"image_url","image_url":{"detail":"low"}},
|
||||
{"type":"file","file":{"filename":"orphan.txt"}},
|
||||
{"type":"unknown_type","foo":"bar","nested":{"a":1}}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}}
|
||||
]
|
||||
}`)
|
||||
|
||||
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
|
||||
result := string(out)
|
||||
|
||||
parts := gjson.Get(result, "input.2.output").Array()
|
||||
if len(parts) != 3 {
|
||||
t.Fatalf("expected 3 output parts, got %d: %s", len(parts), gjson.Get(result, "input.2.output").Raw)
|
||||
}
|
||||
|
||||
expectedFallbacks := []string{
|
||||
`{"type":"image_url","image_url":{"detail":"low"}}`,
|
||||
`{"type":"file","file":{"filename":"orphan.txt"}}`,
|
||||
`{"type":"unknown_type","foo":"bar","nested":{"a":1}}`,
|
||||
}
|
||||
for i, expectedFallback := range expectedFallbacks {
|
||||
if parts[i].Get("type").String() != "input_text" {
|
||||
t.Fatalf("part %d: expected input_text fallback, got %s", i, parts[i].Raw)
|
||||
}
|
||||
if parts[i].Get("text").String() != expectedFallback {
|
||||
t.Fatalf("part %d: expected fallback %s, got %s", i, expectedFallback, parts[i].Get("text").String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolCallOutputWithNonStringJSONContent(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
expectedOutput string
|
||||
}{
|
||||
{name: "null", content: `null`, expectedOutput: `null`},
|
||||
{name: "object", content: `{"status":"ok","count":2}`, expectedOutput: `{"status":"ok","count":2}`},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
input := []byte(`{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Check tool output."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
"tool_calls": [
|
||||
{"id": "call_json", "type": "function", "function": {"name": "inspect", "arguments": "{}"}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_json",
|
||||
"content": ` + tt.content + `
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}}
|
||||
]
|
||||
}`)
|
||||
|
||||
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
|
||||
result := string(out)
|
||||
|
||||
output := gjson.Get(result, "input.2.output")
|
||||
if !output.Exists() {
|
||||
t.Fatalf("expected output field to exist: %s", gjson.Get(result, "input.2").Raw)
|
||||
}
|
||||
if output.String() != tt.expectedOutput {
|
||||
t.Fatalf("expected output %s, got %s", tt.expectedOutput, output.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Parallel tool calls: assistant invokes 3 tools at once, all call_ids
|
||||
// and outputs must be translated and paired correctly.
|
||||
func TestMultipleToolCalls(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user