feat(util): normalize Claude tool_result content and improve Gemini integration

- Added `ConvertClaudeToolResultContent` to standardize Claude tool_result content, preserving JSON structure and splitting out base64-encoded images.
- Updated Gemini and Gemini-CLI translators to use the new utility for generating deterministic function responses and inline image parts.
- Added comprehensive test cases for content types and edge cases, ensuring correct handling of string, JSON, and image blocks.

Closes: #2781
This commit is contained in:
Luis Pater
2026-06-16 08:09:30 +08:00
parent 844b855974
commit 2406daf3ef
6 changed files with 397 additions and 4 deletions

View File

@@ -115,11 +115,21 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
if len(toolCallIDs) > 1 {
funcName = strings.Join(toolCallIDs[0:len(toolCallIDs)-1], "-")
}
responseData := contentResult.Get("content").Raw
toolResult := util.ConvertClaudeToolResultContent(contentResult.Get("content"))
part := []byte(`{"functionResponse":{"name":"","response":{"result":""}}}`)
part, _ = sjson.SetBytes(part, "functionResponse.name", util.SanitizeFunctionName(funcName))
part, _ = sjson.SetBytes(part, "functionResponse.response.result", responseData)
if toolResult.ResultIsRaw {
part, _ = sjson.SetRawBytes(part, "functionResponse.response.result", []byte(toolResult.Result))
} else {
part, _ = sjson.SetBytes(part, "functionResponse.response.result", toolResult.Result)
}
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", part)
for _, img := range toolResult.Images {
imagePart := []byte(`{"inlineData":{"mime_type":"","data":""}}`)
imagePart, _ = sjson.SetBytes(imagePart, "inlineData.mime_type", img.MimeType)
imagePart, _ = sjson.SetBytes(imagePart, "inlineData.data", img.Data)
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", imagePart)
}
case "image":
source := contentResult.Get("source")

View File

@@ -107,3 +107,80 @@ func TestConvertClaudeRequestToCLI_ConvertsMessageSystemRoleToUserContent(t *tes
t.Fatalf("Unexpected first system part: %q", got)
}
}
func TestConvertClaudeRequestToCLI_StructuredToolResult(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
]
},
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "json-call-1",
"content": [
{"type": "text", "text": "alpha"},
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "aGVsbG8="}}
]
}
]
}
]
}`)
output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false)
fr := gjson.GetBytes(output, "request.contents.1.parts.0.functionResponse")
if !fr.Exists() {
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "request.contents").Raw)
}
// The text block must remain structured JSON, not a double-encoded string blob.
if got := fr.Get("response.result.text").String(); got != "alpha" {
t.Fatalf("expected structured result text 'alpha', got result=%s", fr.Get("response.result").Raw)
}
// The image block must be emitted as a separate inlineData part, not embedded in result.
img := gjson.GetBytes(output, "request.contents.1.parts.1.inlineData")
if got := img.Get("mime_type").String(); got != "image/png" {
t.Fatalf("expected image mime type 'image/png', got '%s'", got)
}
if got := img.Get("data").String(); got != "aGVsbG8=" {
t.Fatalf("expected image data 'aGVsbG8=', got '%s'", got)
}
}
func TestConvertClaudeRequestToCLI_StringToolResult(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
]
},
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "json-call-1", "content": "alpha"}
]
}
]
}`)
output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false)
fr := gjson.GetBytes(output, "request.contents.1.parts.0.functionResponse")
if !fr.Exists() {
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "request.contents").Raw)
}
// String content must not be double-encoded: result should be exactly "alpha".
if got := fr.Get("response.result").String(); got != "alpha" {
t.Fatalf("expected result 'alpha', got '%s' (raw=%s)", got, fr.Get("response.result").Raw)
}
}

View File

@@ -119,11 +119,21 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
funcName = toolCallID
}
funcName = util.SanitizeFunctionName(funcName)
responseData := contentResult.Get("content").Raw
toolResult := util.ConvertClaudeToolResultContent(contentResult.Get("content"))
part := []byte(`{"functionResponse":{"name":"","response":{"result":""}}}`)
part, _ = sjson.SetBytes(part, "functionResponse.name", funcName)
part, _ = sjson.SetBytes(part, "functionResponse.response.result", responseData)
if toolResult.ResultIsRaw {
part, _ = sjson.SetRawBytes(part, "functionResponse.response.result", []byte(toolResult.Result))
} else {
part, _ = sjson.SetBytes(part, "functionResponse.response.result", toolResult.Result)
}
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", part)
for _, img := range toolResult.Images {
imagePart := []byte(`{"inline_data":{"mime_type":"","data":""}}`)
imagePart, _ = sjson.SetBytes(imagePart, "inline_data.mime_type", img.MimeType)
imagePart, _ = sjson.SetBytes(imagePart, "inline_data.data", img.Data)
contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", imagePart)
}
case "image":
source := contentResult.Get("source")

View File

@@ -178,3 +178,80 @@ func TestConvertClaudeRequestToGemini_SkipsEmptyTextParts(t *testing.T) {
t.Fatalf("Expected part text 'hello', got '%s'", got)
}
}
func TestConvertClaudeRequestToGemini_StructuredToolResult(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
]
},
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "json-call-1",
"content": [
{"type": "text", "text": "alpha"},
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "aGVsbG8="}}
]
}
]
}
]
}`)
output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false)
fr := gjson.GetBytes(output, "contents.1.parts.0.functionResponse")
if !fr.Exists() {
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "contents").Raw)
}
// The text block must remain structured JSON, not a double-encoded string blob.
if got := fr.Get("response.result.text").String(); got != "alpha" {
t.Fatalf("expected structured result text 'alpha', got result=%s", fr.Get("response.result").Raw)
}
// The image block must be emitted as a separate inline_data part, not embedded in result.
img := gjson.GetBytes(output, "contents.1.parts.1.inline_data")
if got := img.Get("mime_type").String(); got != "image/png" {
t.Fatalf("expected image mime type 'image/png', got '%s'", got)
}
if got := img.Get("data").String(); got != "aGVsbG8=" {
t.Fatalf("expected image data 'aGVsbG8=', got '%s'", got)
}
}
func TestConvertClaudeRequestToGemini_StringToolResult(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "assistant",
"content": [
{"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}}
]
},
{
"role": "user",
"content": [
{"type": "tool_result", "tool_use_id": "json-call-1", "content": "alpha"}
]
}
]
}`)
output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false)
fr := gjson.GetBytes(output, "contents.1.parts.0.functionResponse")
if !fr.Exists() {
t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "contents").Raw)
}
// String content must not be double-encoded: result should be exactly "alpha".
if got := fr.Get("response.result").String(); got != "alpha" {
t.Fatalf("expected result 'alpha', got '%s' (raw=%s)", got, fr.Get("response.result").Raw)
}
}

View File

@@ -0,0 +1,109 @@
package util
import (
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
// ClaudeToolResultImage represents a base64-encoded image extracted from a Claude
// tool_result content block. Callers emit it as a provider-specific inline data
// part so that image bytes do not bloat the textual function response result.
type ClaudeToolResultImage struct {
MimeType string
Data string
}
// ClaudeToolResult is the normalized form of a Claude tool_result `content` field,
// ready to be written into a Gemini-style functionResponse.
type ClaudeToolResult struct {
// Result is the value for functionResponse.response.result.
Result string
// ResultIsRaw reports whether Result holds raw JSON (write with sjson.SetRaw*)
// or a plain string (write with sjson.Set*). Writing raw JSON text through
// sjson.Set as a string value would double-encode it, so callers must honor
// this flag.
ResultIsRaw bool
// Images holds base64 image blocks separated out of the content.
Images []ClaudeToolResultImage
}
// ConvertClaudeToolResultContent normalizes a Claude tool_result `content` field into
// a deterministic Gemini functionResponse result plus any extracted images.
//
// Claude tool_result content may be a plain string, an array of mixed text/image
// blocks, a single object, or absent. Some Claude->Gemini translators previously
// wrote content.Raw straight through sjson.SetBytes, which double-encoded string
// content and flattened structured arrays (including base64 image data) into one
// opaque escaped string. This helper mirrors the Antigravity Claude translator,
// which already handles structured content correctly:
//
// - string -> plain string result (no double-encoding)
// - single non-image -> raw JSON result (structure preserved)
// - multiple non-image -> raw JSON array result
// - base64 image block -> separated into Images (emitted as inline data parts)
// - object -> raw JSON result, or image -> Images with empty result
// - absent/empty -> empty string result
//
// Unlike Antigravity, image blocks without base64 data are dropped rather than
// emitted as empty inline data parts, matching the Gemini image part guards.
func ConvertClaudeToolResultContent(content gjson.Result) ClaudeToolResult {
switch {
case content.Type == gjson.String:
return ClaudeToolResult{Result: content.String()}
case content.IsArray():
var images []ClaudeToolResultImage
nonImageCount := 0
lastNonImageRaw := ""
filtered := []byte(`[]`)
content.ForEach(func(_, block gjson.Result) bool {
if isClaudeBase64Image(block) {
if img, ok := claudeImageFromBlock(block); ok {
images = append(images, img)
}
return true
}
nonImageCount++
lastNonImageRaw = block.Raw
filtered, _ = sjson.SetRawBytes(filtered, "-1", []byte(block.Raw))
return true
})
switch {
case nonImageCount == 1:
return ClaudeToolResult{Result: lastNonImageRaw, ResultIsRaw: true, Images: images}
case nonImageCount > 1:
return ClaudeToolResult{Result: string(filtered), ResultIsRaw: true, Images: images}
default:
return ClaudeToolResult{Images: images}
}
case content.IsObject():
if isClaudeBase64Image(content) {
if img, ok := claudeImageFromBlock(content); ok {
return ClaudeToolResult{Images: []ClaudeToolResultImage{img}}
}
return ClaudeToolResult{}
}
return ClaudeToolResult{Result: content.Raw, ResultIsRaw: true}
case content.Raw != "":
return ClaudeToolResult{Result: content.Raw, ResultIsRaw: true}
default:
return ClaudeToolResult{}
}
}
// isClaudeBase64Image reports whether a content block is a base64-encoded image block.
func isClaudeBase64Image(block gjson.Result) bool {
return block.Get("type").String() == "image" && block.Get("source.type").String() == "base64"
}
// claudeImageFromBlock extracts image data from a base64 image block. It returns false
// when the block carries no base64 data, so empty inline data parts are not emitted.
func claudeImageFromBlock(block gjson.Result) (ClaudeToolResultImage, bool) {
data := block.Get("source.data").String()
if data == "" {
return ClaudeToolResultImage{}, false
}
return ClaudeToolResultImage{
MimeType: block.Get("source.media_type").String(),
Data: data,
}, true
}

View File

@@ -0,0 +1,110 @@
package util
import (
"testing"
"github.com/tidwall/gjson"
)
func TestConvertClaudeToolResultContent(t *testing.T) {
tests := []struct {
name string
wrapper string
wantResult string
wantRaw bool
wantImages int
}{
{
name: "StringContent",
wrapper: `{"content":"alpha"}`,
wantResult: "alpha",
wantRaw: false,
wantImages: 0,
},
{
name: "SingleTextBlock",
wrapper: `{"content":[{"type":"text","text":"alpha"}]}`,
wantResult: `{"type":"text","text":"alpha"}`,
wantRaw: true,
wantImages: 0,
},
{
name: "MultipleTextBlocks",
wrapper: `{"content":[{"type":"text","text":"alpha"},{"type":"text","text":"beta"}]}`,
wantResult: `[{"type":"text","text":"alpha"},{"type":"text","text":"beta"}]`,
wantRaw: true,
wantImages: 0,
},
{
name: "TextAndImage",
wrapper: `{"content":[{"type":"text","text":"alpha"},{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`,
wantResult: `{"type":"text","text":"alpha"}`,
wantRaw: true,
wantImages: 1,
},
{
name: "ImageOnly",
wrapper: `{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`,
wantResult: "",
wantRaw: false,
wantImages: 1,
},
{
name: "ImageWithoutDataDropped",
wrapper: `{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png"}}]}`,
wantResult: "",
wantRaw: false,
wantImages: 0,
},
{
name: "ObjectContent",
wrapper: `{"content":{"foo":"bar"}}`,
wantResult: `{"foo":"bar"}`,
wantRaw: true,
wantImages: 0,
},
{
name: "ObjectImage",
wrapper: `{"content":{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}}`,
wantResult: "",
wantRaw: false,
wantImages: 1,
},
{
name: "AbsentContent",
wrapper: `{}`,
wantResult: "",
wantRaw: false,
wantImages: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ConvertClaudeToolResultContent(gjson.Get(tt.wrapper, "content"))
if got.Result != tt.wantResult {
t.Errorf("Result = %q, want %q", got.Result, tt.wantResult)
}
if got.ResultIsRaw != tt.wantRaw {
t.Errorf("ResultIsRaw = %v, want %v", got.ResultIsRaw, tt.wantRaw)
}
if len(got.Images) != tt.wantImages {
t.Errorf("len(Images) = %d, want %d", len(got.Images), tt.wantImages)
}
})
}
}
func TestConvertClaudeToolResultContent_ImageFields(t *testing.T) {
content := gjson.Get(`{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`, "content")
got := ConvertClaudeToolResultContent(content)
if len(got.Images) != 1 {
t.Fatalf("expected 1 image, got %d", len(got.Images))
}
if got.Images[0].MimeType != "image/png" {
t.Errorf("MimeType = %q, want image/png", got.Images[0].MimeType)
}
if got.Images[0].Data != "aGVsbG8=" {
t.Errorf("Data = %q, want aGVsbG8=", got.Images[0].Data)
}
}