From 2406daf3ef7e07aa1fd4035b87b4566d8d12e717 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Tue, 16 Jun 2026 08:09:30 +0800 Subject: [PATCH] feat(util): normalize Claude tool_result content and improve Gemini integration - Added `ConvertClaudeToolResultContent` to standardize Claude tool_result content, preserving JSON structure and splitting out base64-encoded images. - Updated Gemini and Gemini-CLI translators to use the new utility for generating deterministic function responses and inline image parts. - Added comprehensive test cases for content types and edge cases, ensuring correct handling of string, JSON, and image blocks. Closes: #2781 --- .../claude/gemini-cli_claude_request.go | 14 ++- .../claude/gemini-cli_claude_request_test.go | 77 ++++++++++++ .../gemini/claude/gemini_claude_request.go | 14 ++- .../claude/gemini_claude_request_test.go | 77 ++++++++++++ internal/util/claude_tool_result.go | 109 +++++++++++++++++ internal/util/claude_tool_result_test.go | 110 ++++++++++++++++++ 6 files changed, 397 insertions(+), 4 deletions(-) create mode 100644 internal/util/claude_tool_result.go create mode 100644 internal/util/claude_tool_result_test.go diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 80e942118..5291df437 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -115,11 +115,21 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] if len(toolCallIDs) > 1 { funcName = strings.Join(toolCallIDs[0:len(toolCallIDs)-1], "-") } - responseData := contentResult.Get("content").Raw + toolResult := util.ConvertClaudeToolResultContent(contentResult.Get("content")) part := []byte(`{"functionResponse":{"name":"","response":{"result":""}}}`) part, _ = sjson.SetBytes(part, "functionResponse.name", util.SanitizeFunctionName(funcName)) - part, _ = sjson.SetBytes(part, "functionResponse.response.result", responseData) + if toolResult.ResultIsRaw { + part, _ = sjson.SetRawBytes(part, "functionResponse.response.result", []byte(toolResult.Result)) + } else { + part, _ = sjson.SetBytes(part, "functionResponse.response.result", toolResult.Result) + } contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", part) + for _, img := range toolResult.Images { + imagePart := []byte(`{"inlineData":{"mime_type":"","data":""}}`) + imagePart, _ = sjson.SetBytes(imagePart, "inlineData.mime_type", img.MimeType) + imagePart, _ = sjson.SetBytes(imagePart, "inlineData.data", img.Data) + contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", imagePart) + } case "image": source := contentResult.Get("source") diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request_test.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request_test.go index 50a491fd9..ea634205b 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request_test.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request_test.go @@ -107,3 +107,80 @@ func TestConvertClaudeRequestToCLI_ConvertsMessageSystemRoleToUserContent(t *tes t.Fatalf("Unexpected first system part: %q", got) } } + +func TestConvertClaudeRequestToCLI_StructuredToolResult(t *testing.T) { + inputJSON := []byte(`{ + "model": "gemini-3-flash-preview", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}} + ] + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "json-call-1", + "content": [ + {"type": "text", "text": "alpha"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "aGVsbG8="}} + ] + } + ] + } + ] + }`) + + output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false) + + fr := gjson.GetBytes(output, "request.contents.1.parts.0.functionResponse") + if !fr.Exists() { + t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "request.contents").Raw) + } + // The text block must remain structured JSON, not a double-encoded string blob. + if got := fr.Get("response.result.text").String(); got != "alpha" { + t.Fatalf("expected structured result text 'alpha', got result=%s", fr.Get("response.result").Raw) + } + // The image block must be emitted as a separate inlineData part, not embedded in result. + img := gjson.GetBytes(output, "request.contents.1.parts.1.inlineData") + if got := img.Get("mime_type").String(); got != "image/png" { + t.Fatalf("expected image mime type 'image/png', got '%s'", got) + } + if got := img.Get("data").String(); got != "aGVsbG8=" { + t.Fatalf("expected image data 'aGVsbG8=', got '%s'", got) + } +} + +func TestConvertClaudeRequestToCLI_StringToolResult(t *testing.T) { + inputJSON := []byte(`{ + "model": "gemini-3-flash-preview", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}} + ] + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "json-call-1", "content": "alpha"} + ] + } + ] + }`) + + output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false) + + fr := gjson.GetBytes(output, "request.contents.1.parts.0.functionResponse") + if !fr.Exists() { + t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "request.contents").Raw) + } + // String content must not be double-encoded: result should be exactly "alpha". + if got := fr.Get("response.result").String(); got != "alpha" { + t.Fatalf("expected result 'alpha', got '%s' (raw=%s)", got, fr.Get("response.result").Raw) + } +} diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index 3347eaec1..96d04a18e 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -119,11 +119,21 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) funcName = toolCallID } funcName = util.SanitizeFunctionName(funcName) - responseData := contentResult.Get("content").Raw + toolResult := util.ConvertClaudeToolResultContent(contentResult.Get("content")) part := []byte(`{"functionResponse":{"name":"","response":{"result":""}}}`) part, _ = sjson.SetBytes(part, "functionResponse.name", funcName) - part, _ = sjson.SetBytes(part, "functionResponse.response.result", responseData) + if toolResult.ResultIsRaw { + part, _ = sjson.SetRawBytes(part, "functionResponse.response.result", []byte(toolResult.Result)) + } else { + part, _ = sjson.SetBytes(part, "functionResponse.response.result", toolResult.Result) + } contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", part) + for _, img := range toolResult.Images { + imagePart := []byte(`{"inline_data":{"mime_type":"","data":""}}`) + imagePart, _ = sjson.SetBytes(imagePart, "inline_data.mime_type", img.MimeType) + imagePart, _ = sjson.SetBytes(imagePart, "inline_data.data", img.Data) + contentJSON, _ = sjson.SetRawBytes(contentJSON, "parts.-1", imagePart) + } case "image": source := contentResult.Get("source") diff --git a/internal/translator/gemini/claude/gemini_claude_request_test.go b/internal/translator/gemini/claude/gemini_claude_request_test.go index 81b06214e..f40708b59 100644 --- a/internal/translator/gemini/claude/gemini_claude_request_test.go +++ b/internal/translator/gemini/claude/gemini_claude_request_test.go @@ -178,3 +178,80 @@ func TestConvertClaudeRequestToGemini_SkipsEmptyTextParts(t *testing.T) { t.Fatalf("Expected part text 'hello', got '%s'", got) } } + +func TestConvertClaudeRequestToGemini_StructuredToolResult(t *testing.T) { + inputJSON := []byte(`{ + "model": "gemini-3-flash-preview", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}} + ] + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "json-call-1", + "content": [ + {"type": "text", "text": "alpha"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "aGVsbG8="}} + ] + } + ] + } + ] + }`) + + output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false) + + fr := gjson.GetBytes(output, "contents.1.parts.0.functionResponse") + if !fr.Exists() { + t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "contents").Raw) + } + // The text block must remain structured JSON, not a double-encoded string blob. + if got := fr.Get("response.result.text").String(); got != "alpha" { + t.Fatalf("expected structured result text 'alpha', got result=%s", fr.Get("response.result").Raw) + } + // The image block must be emitted as a separate inline_data part, not embedded in result. + img := gjson.GetBytes(output, "contents.1.parts.1.inline_data") + if got := img.Get("mime_type").String(); got != "image/png" { + t.Fatalf("expected image mime type 'image/png', got '%s'", got) + } + if got := img.Get("data").String(); got != "aGVsbG8=" { + t.Fatalf("expected image data 'aGVsbG8=', got '%s'", got) + } +} + +func TestConvertClaudeRequestToGemini_StringToolResult(t *testing.T) { + inputJSON := []byte(`{ + "model": "gemini-3-flash-preview", + "messages": [ + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "json-call-1", "name": "json", "input": {"ok": true}} + ] + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "json-call-1", "content": "alpha"} + ] + } + ] + }`) + + output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false) + + fr := gjson.GetBytes(output, "contents.1.parts.0.functionResponse") + if !fr.Exists() { + t.Fatalf("expected functionResponse part, contents=%s", gjson.GetBytes(output, "contents").Raw) + } + // String content must not be double-encoded: result should be exactly "alpha". + if got := fr.Get("response.result").String(); got != "alpha" { + t.Fatalf("expected result 'alpha', got '%s' (raw=%s)", got, fr.Get("response.result").Raw) + } +} diff --git a/internal/util/claude_tool_result.go b/internal/util/claude_tool_result.go new file mode 100644 index 000000000..585548535 --- /dev/null +++ b/internal/util/claude_tool_result.go @@ -0,0 +1,109 @@ +package util + +import ( + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// ClaudeToolResultImage represents a base64-encoded image extracted from a Claude +// tool_result content block. Callers emit it as a provider-specific inline data +// part so that image bytes do not bloat the textual function response result. +type ClaudeToolResultImage struct { + MimeType string + Data string +} + +// ClaudeToolResult is the normalized form of a Claude tool_result `content` field, +// ready to be written into a Gemini-style functionResponse. +type ClaudeToolResult struct { + // Result is the value for functionResponse.response.result. + Result string + // ResultIsRaw reports whether Result holds raw JSON (write with sjson.SetRaw*) + // or a plain string (write with sjson.Set*). Writing raw JSON text through + // sjson.Set as a string value would double-encode it, so callers must honor + // this flag. + ResultIsRaw bool + // Images holds base64 image blocks separated out of the content. + Images []ClaudeToolResultImage +} + +// ConvertClaudeToolResultContent normalizes a Claude tool_result `content` field into +// a deterministic Gemini functionResponse result plus any extracted images. +// +// Claude tool_result content may be a plain string, an array of mixed text/image +// blocks, a single object, or absent. Some Claude->Gemini translators previously +// wrote content.Raw straight through sjson.SetBytes, which double-encoded string +// content and flattened structured arrays (including base64 image data) into one +// opaque escaped string. This helper mirrors the Antigravity Claude translator, +// which already handles structured content correctly: +// +// - string -> plain string result (no double-encoding) +// - single non-image -> raw JSON result (structure preserved) +// - multiple non-image -> raw JSON array result +// - base64 image block -> separated into Images (emitted as inline data parts) +// - object -> raw JSON result, or image -> Images with empty result +// - absent/empty -> empty string result +// +// Unlike Antigravity, image blocks without base64 data are dropped rather than +// emitted as empty inline data parts, matching the Gemini image part guards. +func ConvertClaudeToolResultContent(content gjson.Result) ClaudeToolResult { + switch { + case content.Type == gjson.String: + return ClaudeToolResult{Result: content.String()} + case content.IsArray(): + var images []ClaudeToolResultImage + nonImageCount := 0 + lastNonImageRaw := "" + filtered := []byte(`[]`) + content.ForEach(func(_, block gjson.Result) bool { + if isClaudeBase64Image(block) { + if img, ok := claudeImageFromBlock(block); ok { + images = append(images, img) + } + return true + } + nonImageCount++ + lastNonImageRaw = block.Raw + filtered, _ = sjson.SetRawBytes(filtered, "-1", []byte(block.Raw)) + return true + }) + switch { + case nonImageCount == 1: + return ClaudeToolResult{Result: lastNonImageRaw, ResultIsRaw: true, Images: images} + case nonImageCount > 1: + return ClaudeToolResult{Result: string(filtered), ResultIsRaw: true, Images: images} + default: + return ClaudeToolResult{Images: images} + } + case content.IsObject(): + if isClaudeBase64Image(content) { + if img, ok := claudeImageFromBlock(content); ok { + return ClaudeToolResult{Images: []ClaudeToolResultImage{img}} + } + return ClaudeToolResult{} + } + return ClaudeToolResult{Result: content.Raw, ResultIsRaw: true} + case content.Raw != "": + return ClaudeToolResult{Result: content.Raw, ResultIsRaw: true} + default: + return ClaudeToolResult{} + } +} + +// isClaudeBase64Image reports whether a content block is a base64-encoded image block. +func isClaudeBase64Image(block gjson.Result) bool { + return block.Get("type").String() == "image" && block.Get("source.type").String() == "base64" +} + +// claudeImageFromBlock extracts image data from a base64 image block. It returns false +// when the block carries no base64 data, so empty inline data parts are not emitted. +func claudeImageFromBlock(block gjson.Result) (ClaudeToolResultImage, bool) { + data := block.Get("source.data").String() + if data == "" { + return ClaudeToolResultImage{}, false + } + return ClaudeToolResultImage{ + MimeType: block.Get("source.media_type").String(), + Data: data, + }, true +} diff --git a/internal/util/claude_tool_result_test.go b/internal/util/claude_tool_result_test.go new file mode 100644 index 000000000..6ac24081b --- /dev/null +++ b/internal/util/claude_tool_result_test.go @@ -0,0 +1,110 @@ +package util + +import ( + "testing" + + "github.com/tidwall/gjson" +) + +func TestConvertClaudeToolResultContent(t *testing.T) { + tests := []struct { + name string + wrapper string + wantResult string + wantRaw bool + wantImages int + }{ + { + name: "StringContent", + wrapper: `{"content":"alpha"}`, + wantResult: "alpha", + wantRaw: false, + wantImages: 0, + }, + { + name: "SingleTextBlock", + wrapper: `{"content":[{"type":"text","text":"alpha"}]}`, + wantResult: `{"type":"text","text":"alpha"}`, + wantRaw: true, + wantImages: 0, + }, + { + name: "MultipleTextBlocks", + wrapper: `{"content":[{"type":"text","text":"alpha"},{"type":"text","text":"beta"}]}`, + wantResult: `[{"type":"text","text":"alpha"},{"type":"text","text":"beta"}]`, + wantRaw: true, + wantImages: 0, + }, + { + name: "TextAndImage", + wrapper: `{"content":[{"type":"text","text":"alpha"},{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`, + wantResult: `{"type":"text","text":"alpha"}`, + wantRaw: true, + wantImages: 1, + }, + { + name: "ImageOnly", + wrapper: `{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`, + wantResult: "", + wantRaw: false, + wantImages: 1, + }, + { + name: "ImageWithoutDataDropped", + wrapper: `{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png"}}]}`, + wantResult: "", + wantRaw: false, + wantImages: 0, + }, + { + name: "ObjectContent", + wrapper: `{"content":{"foo":"bar"}}`, + wantResult: `{"foo":"bar"}`, + wantRaw: true, + wantImages: 0, + }, + { + name: "ObjectImage", + wrapper: `{"content":{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}}`, + wantResult: "", + wantRaw: false, + wantImages: 1, + }, + { + name: "AbsentContent", + wrapper: `{}`, + wantResult: "", + wantRaw: false, + wantImages: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ConvertClaudeToolResultContent(gjson.Get(tt.wrapper, "content")) + if got.Result != tt.wantResult { + t.Errorf("Result = %q, want %q", got.Result, tt.wantResult) + } + if got.ResultIsRaw != tt.wantRaw { + t.Errorf("ResultIsRaw = %v, want %v", got.ResultIsRaw, tt.wantRaw) + } + if len(got.Images) != tt.wantImages { + t.Errorf("len(Images) = %d, want %d", len(got.Images), tt.wantImages) + } + }) + } +} + +func TestConvertClaudeToolResultContent_ImageFields(t *testing.T) { + content := gjson.Get(`{"content":[{"type":"image","source":{"type":"base64","media_type":"image/png","data":"aGVsbG8="}}]}`, "content") + got := ConvertClaudeToolResultContent(content) + if len(got.Images) != 1 { + t.Fatalf("expected 1 image, got %d", len(got.Images)) + } + if got.Images[0].MimeType != "image/png" { + t.Errorf("MimeType = %q, want image/png", got.Images[0].MimeType) + } + if got.Images[0].Data != "aGVsbG8=" { + t.Errorf("Data = %q, want aGVsbG8=", got.Images[0].Data) + } +}