mirror of
https://github.com/router-for-me/CLIProxyAPI.git
synced 2026-06-22 06:57:37 +08:00
feat(util): normalize Claude tool_result content and improve Gemini integration
- Added `ConvertClaudeToolResultContent` to standardize Claude tool_result content, preserving JSON structure and splitting out base64-encoded images. - Updated Gemini and Gemini-CLI translators to use the new utility for generating deterministic function responses and inline image parts. - Added comprehensive test cases for content types and edge cases, ensuring correct handling of string, JSON, and image blocks. Closes: #2781
This commit is contained in:
@@ -115,11 +115,21 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
if len(toolCallIDs) > 1 {
|
||||
funcName = strings.Join(toolCallIDs[0:len(toolCallIDs)-1], "-")
|
||||
}
|
||||
responseData := contentResult.Get("content").Raw
|
||||
toolResult := util.ConvertClaudeToolResultContent(contentResult.Get("content"))
|
||||
part := []byte(`{"functionResponse":{"name":"","response":{"result":""}}}`)
|
||||
part, _ = sjson.SetBytes(part, "functionResponse.name", util.SanitizeFunctionName(funcName))
|
||||
part, _ = sjson.SetBytes(part, "functionResponse.response.result", responseData)
|
||||
if toolResult.ResultIsRaw {
|
||||
part, _ = sjson.SetRawBytes(part, "functionResponse.response.result", []byte(toolResult.Result))
|
||||
} else {
|
||||
part, _ = sjson.SetBytes(part, "functionResponse.response.result", toolResult.Result)
|
||||
}
|
||||
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", part)
|
||||
for _, img := range toolResult.Images {
|
||||
imagePart := []byte(`{"inlineData":{"mime_type":"","data":""}}`)
|
||||
imagePart, _ = sjson.SetBytes(imagePart, "inlineData.mime_type", img.MimeType)
|
||||
imagePart, _ = sjson.SetBytes(imagePart, "inlineData.data", img.Data)
|
||||
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", imagePart)
|
||||
}
|
||||
|
||||
case "image":
|
||||
source := contentResult.Get("source")
|
||||
|
||||
@@ -107,3 +107,80 @@ func TestConvertClaudeRequestToCLI_ConvertsMessageSystemRoleToUserContent(t *tes
|
||||
t.Fatalf("Unexpected first system part: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToCLI_StructuredToolResult(t *testing.T) {
|
||||
inputJSON := []byte(`{
|
||||
"model": "gemini-3-flash-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "json-call-1",
|
||||
"content": [
|
||||
{"type": "text", "text": "alpha"},
|
||||
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "aGVsbG8="}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false)
|
||||
|
||||
fr := gjson.GetBytes(output, "request.contents.1.parts.0.functionResponse")
|
||||
if !fr.Exists() {
|
||||
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "request.contents").Raw)
|
||||
}
|
||||
// The text block must remain structured JSON, not a double-encoded string blob.
|
||||
if got := fr.Get("response.result.text").String(); got != "alpha" {
|
||||
t.Fatalf("expected structured result text 'alpha', got result=%s", fr.Get("response.result").Raw)
|
||||
}
|
||||
// The image block must be emitted as a separate inlineData part, not embedded in result.
|
||||
img := gjson.GetBytes(output, "request.contents.1.parts.1.inlineData")
|
||||
if got := img.Get("mime_type").String(); got != "image/png" {
|
||||
t.Fatalf("expected image mime type 'image/png', got '%s'", got)
|
||||
}
|
||||
if got := img.Get("data").String(); got != "aGVsbG8=" {
|
||||
t.Fatalf("expected image data 'aGVsbG8=', got '%s'", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToCLI_StringToolResult(t *testing.T) {
|
||||
inputJSON := []byte(`{
|
||||
"model": "gemini-3-flash-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "json-call-1", "content": "alpha"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false)
|
||||
|
||||
fr := gjson.GetBytes(output, "request.contents.1.parts.0.functionResponse")
|
||||
if !fr.Exists() {
|
||||
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "request.contents").Raw)
|
||||
}
|
||||
// String content must not be double-encoded: result should be exactly "alpha".
|
||||
if got := fr.Get("response.result").String(); got != "alpha" {
|
||||
t.Fatalf("expected result 'alpha', got '%s' (raw=%s)", got, fr.Get("response.result").Raw)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,11 +119,21 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
funcName = toolCallID
|
||||
}
|
||||
funcName = util.SanitizeFunctionName(funcName)
|
||||
responseData := contentResult.Get("content").Raw
|
||||
toolResult := util.ConvertClaudeToolResultContent(contentResult.Get("content"))
|
||||
part := []byte(`{"functionResponse":{"name":"","response":{"result":""}}}`)
|
||||
part, _ = sjson.SetBytes(part, "functionResponse.name", funcName)
|
||||
part, _ = sjson.SetBytes(part, "functionResponse.response.result", responseData)
|
||||
if toolResult.ResultIsRaw {
|
||||
part, _ = sjson.SetRawBytes(part, "functionResponse.response.result", []byte(toolResult.Result))
|
||||
} else {
|
||||
part, _ = sjson.SetBytes(part, "functionResponse.response.result", toolResult.Result)
|
||||
}
|
||||
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", part)
|
||||
for _, img := range toolResult.Images {
|
||||
imagePart := []byte(`{"inline_data":{"mime_type":"","data":""}}`)
|
||||
imagePart, _ = sjson.SetBytes(imagePart, "inline_data.mime_type", img.MimeType)
|
||||
imagePart, _ = sjson.SetBytes(imagePart, "inline_data.data", img.Data)
|
||||
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", imagePart)
|
||||
}
|
||||
|
||||
case "image":
|
||||
source := contentResult.Get("source")
|
||||
|
||||
@@ -178,3 +178,80 @@ func TestConvertClaudeRequestToGemini_SkipsEmptyTextParts(t *testing.T) {
|
||||
t.Fatalf("Expected part text 'hello', got '%s'", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToGemini_StructuredToolResult(t *testing.T) {
|
||||
inputJSON := []byte(`{
|
||||
"model": "gemini-3-flash-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "json-call-1",
|
||||
"content": [
|
||||
{"type": "text", "text": "alpha"},
|
||||
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "aGVsbG8="}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false)
|
||||
|
||||
fr := gjson.GetBytes(output, "contents.1.parts.0.functionResponse")
|
||||
if !fr.Exists() {
|
||||
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "contents").Raw)
|
||||
}
|
||||
// The text block must remain structured JSON, not a double-encoded string blob.
|
||||
if got := fr.Get("response.result.text").String(); got != "alpha" {
|
||||
t.Fatalf("expected structured result text 'alpha', got result=%s", fr.Get("response.result").Raw)
|
||||
}
|
||||
// The image block must be emitted as a separate inline_data part, not embedded in result.
|
||||
img := gjson.GetBytes(output, "contents.1.parts.1.inline_data")
|
||||
if got := img.Get("mime_type").String(); got != "image/png" {
|
||||
t.Fatalf("expected image mime type 'image/png', got '%s'", got)
|
||||
}
|
||||
if got := img.Get("data").String(); got != "aGVsbG8=" {
|
||||
t.Fatalf("expected image data 'aGVsbG8=', got '%s'", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeRequestToGemini_StringToolResult(t *testing.T) {
|
||||
inputJSON := []byte(`{
|
||||
"model": "gemini-3-flash-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "tool_result", "tool_use_id": "json-call-1", "content": "alpha"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
||||
output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false)
|
||||
|
||||
fr := gjson.GetBytes(output, "contents.1.parts.0.functionResponse")
|
||||
if !fr.Exists() {
|
||||
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "contents").Raw)
|
||||
}
|
||||
// String content must not be double-encoded: result should be exactly "alpha".
|
||||
if got := fr.Get("response.result").String(); got != "alpha" {
|
||||
t.Fatalf("expected result 'alpha', got '%s' (raw=%s)", got, fr.Get("response.result").Raw)
|
||||
}
|
||||
}
|
||||
|
||||
109
internal/util/claude_tool_result.go
Normal file
109
internal/util/claude_tool_result.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
// ClaudeToolResultImage represents a base64-encoded image extracted from a Claude
|
||||
// tool_result content block. Callers emit it as a provider-specific inline data
|
||||
// part so that image bytes do not bloat the textual function response result.
|
||||
type ClaudeToolResultImage struct {
|
||||
MimeType string
|
||||
Data string
|
||||
}
|
||||
|
||||
// ClaudeToolResult is the normalized form of a Claude tool_result `content` field,
|
||||
// ready to be written into a Gemini-style functionResponse.
|
||||
type ClaudeToolResult struct {
|
||||
// Result is the value for functionResponse.response.result.
|
||||
Result string
|
||||
// ResultIsRaw reports whether Result holds raw JSON (write with sjson.SetRaw*)
|
||||
// or a plain string (write with sjson.Set*). Writing raw JSON text through
|
||||
// sjson.Set as a string value would double-encode it, so callers must honor
|
||||
// this flag.
|
||||
ResultIsRaw bool
|
||||
// Images holds base64 image blocks separated out of the content.
|
||||
Images []ClaudeToolResultImage
|
||||
}
|
||||
|
||||
// ConvertClaudeToolResultContent normalizes a Claude tool_result `content` field into
|
||||
// a deterministic Gemini functionResponse result plus any extracted images.
|
||||
//
|
||||
// Claude tool_result content may be a plain string, an array of mixed text/image
|
||||
// blocks, a single object, or absent. Some Claude->Gemini translators previously
|
||||
// wrote content.Raw straight through sjson.SetBytes, which double-encoded string
|
||||
// content and flattened structured arrays (including base64 image data) into one
|
||||
// opaque escaped string. This helper mirrors the Antigravity Claude translator,
|
||||
// which already handles structured content correctly:
|
||||
//
|
||||
// - string -> plain string result (no double-encoding)
|
||||
// - single non-image -> raw JSON result (structure preserved)
|
||||
// - multiple non-image -> raw JSON array result
|
||||
// - base64 image block -> separated into Images (emitted as inline data parts)
|
||||
// - object -> raw JSON result, or image -> Images with empty result
|
||||
// - absent/empty -> empty string result
|
||||
//
|
||||
// Unlike Antigravity, image blocks without base64 data are dropped rather than
|
||||
// emitted as empty inline data parts, matching the Gemini image part guards.
|
||||
func ConvertClaudeToolResultContent(content gjson.Result) ClaudeToolResult {
|
||||
switch {
|
||||
case content.Type == gjson.String:
|
||||
return ClaudeToolResult{Result: content.String()}
|
||||
case content.IsArray():
|
||||
var images []ClaudeToolResultImage
|
||||
nonImageCount := 0
|
||||
lastNonImageRaw := ""
|
||||
filtered := []byte(`[]`)
|
||||
content.ForEach(func(_, block gjson.Result) bool {
|
||||
if isClaudeBase64Image(block) {
|
||||
if img, ok := claudeImageFromBlock(block); ok {
|
||||
images = append(images, img)
|
||||
}
|
||||
return true
|
||||
}
|
||||
nonImageCount++
|
||||
lastNonImageRaw = block.Raw
|
||||
filtered, _ = sjson.SetRawBytes(filtered, "-1", []byte(block.Raw))
|
||||
return true
|
||||
})
|
||||
switch {
|
||||
case nonImageCount == 1:
|
||||
return ClaudeToolResult{Result: lastNonImageRaw, ResultIsRaw: true, Images: images}
|
||||
case nonImageCount > 1:
|
||||
return ClaudeToolResult{Result: string(filtered), ResultIsRaw: true, Images: images}
|
||||
default:
|
||||
return ClaudeToolResult{Images: images}
|
||||
}
|
||||
case content.IsObject():
|
||||
if isClaudeBase64Image(content) {
|
||||
if img, ok := claudeImageFromBlock(content); ok {
|
||||
return ClaudeToolResult{Images: []ClaudeToolResultImage{img}}
|
||||
}
|
||||
return ClaudeToolResult{}
|
||||
}
|
||||
return ClaudeToolResult{Result: content.Raw, ResultIsRaw: true}
|
||||
case content.Raw != "":
|
||||
return ClaudeToolResult{Result: content.Raw, ResultIsRaw: true}
|
||||
default:
|
||||
return ClaudeToolResult{}
|
||||
}
|
||||
}
|
||||
|
||||
// isClaudeBase64Image reports whether a content block is a base64-encoded image block.
|
||||
func isClaudeBase64Image(block gjson.Result) bool {
|
||||
return block.Get("type").String() == "image" && block.Get("source.type").String() == "base64"
|
||||
}
|
||||
|
||||
// claudeImageFromBlock extracts image data from a base64 image block. It returns false
|
||||
// when the block carries no base64 data, so empty inline data parts are not emitted.
|
||||
func claudeImageFromBlock(block gjson.Result) (ClaudeToolResultImage, bool) {
|
||||
data := block.Get("source.data").String()
|
||||
if data == "" {
|
||||
return ClaudeToolResultImage{}, false
|
||||
}
|
||||
return ClaudeToolResultImage{
|
||||
MimeType: block.Get("source.media_type").String(),
|
||||
Data: data,
|
||||
}, true
|
||||
}
|
||||
110
internal/util/claude_tool_result_test.go
Normal file
110
internal/util/claude_tool_result_test.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestConvertClaudeToolResultContent(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
wrapper string
|
||||
wantResult string
|
||||
wantRaw bool
|
||||
wantImages int
|
||||
}{
|
||||
{
|
||||
name: "StringContent",
|
||||
wrapper: `{"content":"alpha"}`,
|
||||
wantResult: "alpha",
|
||||
wantRaw: false,
|
||||
wantImages: 0,
|
||||
},
|
||||
{
|
||||
name: "SingleTextBlock",
|
||||
wrapper: `{"content":[{"type":"text","text":"alpha"}]}`,
|
||||
wantResult: `{"type":"text","text":"alpha"}`,
|
||||
wantRaw: true,
|
||||
wantImages: 0,
|
||||
},
|
||||
{
|
||||
name: "MultipleTextBlocks",
|
||||
wrapper: `{"content":[{"type":"text","text":"alpha"},{"type":"text","text":"beta"}]}`,
|
||||
wantResult: `[{"type":"text","text":"alpha"},{"type":"text","text":"beta"}]`,
|
||||
wantRaw: true,
|
||||
wantImages: 0,
|
||||
},
|
||||
{
|
||||
name: "TextAndImage",
|
||||
wrapper: `{"content":[{"type":"text","text":"alpha"},{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`,
|
||||
wantResult: `{"type":"text","text":"alpha"}`,
|
||||
wantRaw: true,
|
||||
wantImages: 1,
|
||||
},
|
||||
{
|
||||
name: "ImageOnly",
|
||||
wrapper: `{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`,
|
||||
wantResult: "",
|
||||
wantRaw: false,
|
||||
wantImages: 1,
|
||||
},
|
||||
{
|
||||
name: "ImageWithoutDataDropped",
|
||||
wrapper: `{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png"}}]}`,
|
||||
wantResult: "",
|
||||
wantRaw: false,
|
||||
wantImages: 0,
|
||||
},
|
||||
{
|
||||
name: "ObjectContent",
|
||||
wrapper: `{"content":{"foo":"bar"}}`,
|
||||
wantResult: `{"foo":"bar"}`,
|
||||
wantRaw: true,
|
||||
wantImages: 0,
|
||||
},
|
||||
{
|
||||
name: "ObjectImage",
|
||||
wrapper: `{"content":{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}}`,
|
||||
wantResult: "",
|
||||
wantRaw: false,
|
||||
wantImages: 1,
|
||||
},
|
||||
{
|
||||
name: "AbsentContent",
|
||||
wrapper: `{}`,
|
||||
wantResult: "",
|
||||
wantRaw: false,
|
||||
wantImages: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ConvertClaudeToolResultContent(gjson.Get(tt.wrapper, "content"))
|
||||
if got.Result != tt.wantResult {
|
||||
t.Errorf("Result = %q, want %q", got.Result, tt.wantResult)
|
||||
}
|
||||
if got.ResultIsRaw != tt.wantRaw {
|
||||
t.Errorf("ResultIsRaw = %v, want %v", got.ResultIsRaw, tt.wantRaw)
|
||||
}
|
||||
if len(got.Images) != tt.wantImages {
|
||||
t.Errorf("len(Images) = %d, want %d", len(got.Images), tt.wantImages)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertClaudeToolResultContent_ImageFields(t *testing.T) {
|
||||
content := gjson.Get(`{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`, "content")
|
||||
got := ConvertClaudeToolResultContent(content)
|
||||
if len(got.Images) != 1 {
|
||||
t.Fatalf("expected 1 image, got %d", len(got.Images))
|
||||
}
|
||||
if got.Images[0].MimeType != "image/png" {
|
||||
t.Errorf("MimeType = %q, want image/png", got.Images[0].MimeType)
|
||||
}
|
||||
if got.Images[0].Data != "aGVsbG8=" {
|
||||
t.Errorf("Data = %q, want aGVsbG8=", got.Images[0].Data)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user