From 53a21dfb0b5ce3711db7adf82d603f664dd29dfa Mon Sep 17 00:00:00 2001 From: sususu98 <33882693+sususu98@users.noreply.github.com> Date: Tue, 23 Jun 2026 14:33:27 +0800 Subject: [PATCH] [codex] Drop foreign encrypted_content before xAI Grok upstream (#3961) * Drop foreign encrypted_content before xAI Grok upstream xAI Grok accepts provider-native encrypted_content as opaque replay state, but GPT/Codex reasoning signatures, Gemini thoughtSignature blobs, and Claude thinking signatures can all travel through OpenAI Responses-style reasoning.encrypted_content while remaining incompatible with xAI. Forwarding those foreign blobs to Grok causes upstream validation failures, especially when the foreign value is high-entropy enough to look ciphertext-like. Add a Grok encrypted_content transport validator that stays conservative and shape-oriented: - require unpadded standard base64 with no foreign characters - reject obvious GPT/Codex gAAAA reasoning signatures before decode - reject strict Claude thinking signatures in both official E-form and Antigravity R-form - reject known Gemini thoughtSignature envelopes by reusing the central Gemini validator, covering Gemini 2.5 field-1 and Gemini 3.x field-2 shapes - require decoded payloads to be long enough and high-entropy enough to look like native Grok ciphertext - avoid decrypting, protobuf-parsing, or otherwise interpreting native Grok payloads on the hot path Wire the validator into the xAI Responses request preparation path for reasoning and compaction input items. Invalid encrypted_content fields are deleted before the request is sent upstream, while the surrounding item is preserved and debug logging records only redacted metadata. Extend coverage with native Grok corpus preservation, Gemini field-1/field-2 rejection, Claude E-form and R-form rejection, invalid-blob sanitizer tests, and compact/websocket replay preservation. The foreign-provider checks are deliberately narrow so high-entropy Grok blobs are not rejected merely because they look random. * fix(xai): harden encrypted content sanitizer --- internal/runtime/executor/xai_executor.go | 87 ++++++ .../runtime/executor/xai_executor_test.go | 128 ++++++++- .../executor/xai_websockets_executor_test.go | 7 +- internal/signature/gemini_validation_test.go | 41 +++ internal/signature/grok_validation.go | 123 +++++++++ internal/signature/grok_validation_test.go | 255 ++++++++++++++++++ 6 files changed, 637 insertions(+), 4 deletions(-) create mode 100644 internal/signature/grok_validation.go create mode 100644 internal/signature/grok_validation_test.go diff --git a/internal/runtime/executor/xai_executor.go b/internal/runtime/executor/xai_executor.go index d488ae104..8b21f93d4 100644 --- a/internal/runtime/executor/xai_executor.go +++ b/internal/runtime/executor/xai_executor.go @@ -17,6 +17,7 @@ import ( xaiauth "github.com/router-for-me/CLIProxyAPI/v7/internal/auth/xai" "github.com/router-for-me/CLIProxyAPI/v7/internal/config" "github.com/router-for-me/CLIProxyAPI/v7/internal/runtime/executor/helps" + "github.com/router-for-me/CLIProxyAPI/v7/internal/signature" "github.com/router-for-me/CLIProxyAPI/v7/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v7/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/auth" @@ -835,6 +836,7 @@ func (e *XAIExecutor) prepareResponsesRequestTo(ctx context.Context, req cliprox body = normalizeXAITools(body) body = normalizeXAIToolChoiceForTools(body) body = normalizeXAIInputReasoningItems(body) + body = sanitizeXAIInputEncryptedContent(body) body = normalizeCodexInstructions(body) body = sanitizeXAIResponsesBody(body, baseModel) @@ -1009,6 +1011,9 @@ func sanitizeXAIResponsesBody(body []byte, model string) []byte { body = removeXAIEncryptedReasoningInclude(body) if !xaiSupportsReasoningEffort(model) { body, _ = sjson.DeleteBytes(body, "reasoning.effort") + if reasoning := gjson.GetBytes(body, "reasoning"); reasoning.Exists() && reasoning.IsObject() && len(reasoning.Map()) == 0 { + body, _ = sjson.DeleteBytes(body, "reasoning") + } } return body } @@ -1128,6 +1133,88 @@ func normalizeXAITool(tool gjson.Result) ([]byte, bool, bool) { return raw, changed, true } +func sanitizeXAIInputEncryptedContent(body []byte) []byte { + input := gjson.GetBytes(body, "input") + if !input.Exists() || !input.IsArray() { + return body + } + items := make([]json.RawMessage, 0, len(input.Array())) + changed := false + dropCount := 0 + firstReason := "" + firstItemType := "" + for _, item := range input.Array() { + itemType := strings.TrimSpace(item.Get("type").String()) + if itemType != "reasoning" && itemType != "compaction" { + items = append(items, json.RawMessage(item.Raw)) + continue + } + encryptedContent := item.Get("encrypted_content") + if !encryptedContent.Exists() { + items = append(items, json.RawMessage(item.Raw)) + continue + } + reason := "" + switch encryptedContent.Type { + case gjson.String: + if _, err := signature.InspectGrokEncryptedContent(encryptedContent.String()); err != nil { + reason = err.Error() + } + case gjson.Null: + reason = "encrypted_content is null" + default: + reason = fmt.Sprintf("encrypted_content must be a string, got %s", encryptedContent.Type.String()) + } + if reason == "" { + items = append(items, json.RawMessage(item.Raw)) + continue + } + + if itemType == "compaction" { + changed = true + dropCount++ + if firstReason == "" { + firstReason = reason + firstItemType = itemType + } + continue + } + + next, err := sjson.DeleteBytes([]byte(item.Raw), "encrypted_content") + if err != nil { + items = append(items, json.RawMessage(item.Raw)) + continue + } + items = append(items, json.RawMessage(next)) + changed = true + dropCount++ + if firstReason == "" { + firstReason = reason + firstItemType = itemType + } + } + if !changed { + return body + } + rawInput, err := json.Marshal(items) + if err != nil { + return body + } + updated, err := sjson.SetRawBytes(body, "input", rawInput) + if err != nil { + return body + } + if dropCount > 0 { + log.WithFields(log.Fields{ + "component": "xai_encrypted_content_sanitizer", + "dropped": dropCount, + "first_item_type": firstItemType, + "first_reason": firstReason, + }).Debug("xai executor: removed invalid encrypted_content before upstream") + } + return mergeAdjacentXAIInputReasoningSummaries(updated) +} + func normalizeXAIInputReasoningItems(body []byte) []byte { input := gjson.GetBytes(body, "input") if !input.Exists() || !input.IsArray() { diff --git a/internal/runtime/executor/xai_executor_test.go b/internal/runtime/executor/xai_executor_test.go index e674b5cf1..b360dfbd9 100644 --- a/internal/runtime/executor/xai_executor_test.go +++ b/internal/runtime/executor/xai_executor_test.go @@ -3,6 +3,8 @@ package executor import ( "bytes" "context" + "crypto/sha256" + "encoding/base64" "io" "net/http" "net/http/httptest" @@ -16,6 +18,7 @@ import ( cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v7/sdk/translator" "github.com/tidwall/gjson" + "github.com/tidwall/sjson" ) func TestXAIExecutorExecuteShapesResponsesRequest(t *testing.T) { @@ -255,6 +258,7 @@ func TestXAIExecutorComposerSessionIsolation(t *testing.T) { } func TestXAIExecutorCompactUsesCompactEndpoint(t *testing.T) { + validEncryptedContent := testValidGrokEncryptedContent() var gotPath string var gotAuth string var gotAccept string @@ -283,9 +287,11 @@ func TestXAIExecutorCompactUsesCompactEndpoint(t *testing.T) { }, } + payload := []byte(`{"model":"grok-4.3","stream":true,"input":[{"type":"compaction","encrypted_content":""},{"role":"user","content":"hello"}]}`) + payload, _ = sjson.SetBytes(payload, "input.0.encrypted_content", validEncryptedContent) resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ Model: "grok-4.3", - Payload: []byte(`{"model":"grok-4.3","stream":true,"input":[{"type":"compaction","encrypted_content":"opaque-in"},{"role":"user","content":"hello"}]}`), + Payload: payload, }, cliproxyexecutor.Options{ SourceFormat: sdktranslator.FormatOpenAIResponse, Alt: "responses/compact", @@ -306,8 +312,8 @@ func TestXAIExecutorCompactUsesCompactEndpoint(t *testing.T) { if gjson.GetBytes(gotBody, "stream").Exists() { t.Fatalf("stream exists in compact body: %s", string(gotBody)) } - if got := gjson.GetBytes(gotBody, "input.0.encrypted_content").String(); got != "opaque-in" { - t.Fatalf("input.0.encrypted_content = %q, want opaque-in; body=%s", got, string(gotBody)) + if got := gjson.GetBytes(gotBody, "input.0.encrypted_content").String(); got != validEncryptedContent { + t.Fatalf("input.0.encrypted_content = %q, want valid sample; body=%s", got, string(gotBody)) } if string(resp.Payload) != `{"id":"resp_1","object":"response.compaction","output":[{"type":"compaction","encrypted_content":"opaque-out"}],"usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}` { t.Fatalf("payload = %s", string(resp.Payload)) @@ -1025,3 +1031,119 @@ func TestXAIExecutorComposerReusesClaudeCodeSession(t *testing.T) { t.Fatalf("x-grok-conv-id = %q, want %q", got, firstKey) } } + +func TestSanitizeXAIInputEncryptedContent_DropsInvalidReasoningBlob(t *testing.T) { + body := []byte(`{"model":"grok-4.3","input":[{"type":"reasoning","summary":[],"encrypted_content":"bad"},{"type":"reasoning","summary":[],"encrypted_content":"gAAAAABinvalid-gpt-shape"},{"role":"user","content":"hi"}]}`) + got := sanitizeXAIInputEncryptedContent(body) + if gjson.GetBytes(got, "input.0.encrypted_content").Exists() || gjson.GetBytes(got, "input.1.encrypted_content").Exists() { + t.Fatalf("invalid encrypted_content should be removed: %s", string(got)) + } +} + +func TestSanitizeXAIInputEncryptedContent_PreservesValidBlob(t *testing.T) { + sample := testValidGrokEncryptedContent() + body := []byte(`{"model":"grok-4.3","input":[{"type":"reasoning","summary":[],"encrypted_content":""}]}`) + body, _ = sjson.SetBytes(body, "input.0.encrypted_content", sample) + got := sanitizeXAIInputEncryptedContent(body) + if gotEnc := gjson.GetBytes(got, "input.0.encrypted_content").String(); gotEnc != sample { + t.Fatalf("valid encrypted_content should be preserved, got %q", gotEnc) + } +} + +func TestXAIExecutorReMergesReasoningAfterDroppingInvalidEncryptedContent(t *testing.T) { + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, errRead := io.ReadAll(r.Body) + if errRead != nil { + t.Fatalf("read body: %v", errRead) + } + gotBody = body + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_1\",\"object\":\"response\",\"created_at\":0,\"status\":\"completed\",\"model\":\"grok-4.3\",\"output\":[],\"usage\":{\"input_tokens\":1,\"output_tokens\":1,\"total_tokens\":2}}}\n\n")) + })) + defer server.Close() + + exec := NewXAIExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + Provider: "xai", + Attributes: map[string]string{"base_url": server.URL}, + Metadata: map[string]any{"access_token": "xai-token"}, + } + + _, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "grok-4.3", + Payload: []byte(`{"model":"grok-4.3","input":[` + + `{"type":"reasoning","summary":[{"type":"summary_text","text":"first"}]},` + + `{"type":"reasoning","summary":[{"type":"summary_text","text":"second"}],"encrypted_content":"gAAAAABforeign-codex-replay"},` + + `{"role":"user","content":"hi"}` + + `]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatOpenAIResponse, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + + if got := gjson.GetBytes(gotBody, "input.0.summary.0.text").String(); got != "first" { + t.Fatalf("input.0.summary.0.text = %q, want first; body=%s", got, string(gotBody)) + } + if got := gjson.GetBytes(gotBody, "input.0.summary.1.text").String(); got != "second" { + t.Fatalf("input.0.summary.1.text = %q, want second; body=%s", got, string(gotBody)) + } + if got := gjson.GetBytes(gotBody, "input.1.role").String(); got != "user" { + t.Fatalf("input.1.role = %q, want user; body=%s", got, string(gotBody)) + } + if gjson.GetBytes(gotBody, "input.2").Exists() { + t.Fatalf("input.2 exists, want invalid reasoning blob removed and summaries re-merged; body=%s", string(gotBody)) + } +} + +func TestXAIExecutorDropsInvalidCompactionItem(t *testing.T) { + var gotBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, errRead := io.ReadAll(r.Body) + if errRead != nil { + t.Fatalf("read body: %v", errRead) + } + gotBody = body + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_1\",\"object\":\"response\",\"created_at\":0,\"status\":\"completed\",\"model\":\"grok-4.3\",\"output\":[],\"usage\":{\"input_tokens\":1,\"output_tokens\":1,\"total_tokens\":2}}}\n\n")) + })) + defer server.Close() + + exec := NewXAIExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + Provider: "xai", + Attributes: map[string]string{"base_url": server.URL}, + Metadata: map[string]any{"access_token": "xai-token"}, + } + + _, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "grok-4.3", + Payload: []byte(`{"model":"grok-4.3","input":[{"type":"compaction","encrypted_content":"gAAAAABforeign-codex-replay"},{"role":"user","content":"hi"}]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatOpenAIResponse, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + + if xaiInputHasItemType(gotBody, "compaction") { + t.Fatalf("invalid compaction item reached upstream body: %s", string(gotBody)) + } + if got := gjson.GetBytes(gotBody, "input.0.role").String(); got != "user" { + t.Fatalf("input.0.role = %q, want user after dropping invalid compaction; body=%s", got, string(gotBody)) + } + if gjson.GetBytes(gotBody, "input.1").Exists() { + t.Fatalf("input.1 exists, want only user item after dropping invalid compaction; body=%s", string(gotBody)) + } +} + +func testValidGrokEncryptedContent() string { + buf := make([]byte, 0, 256) + for i := 0; len(buf) < 256; i++ { + sum := sha256.Sum256([]byte{byte(i), byte(i >> 8), byte(i >> 16)}) + buf = append(buf, sum[:]...) + } + return base64.RawStdEncoding.EncodeToString(buf[:256]) +} diff --git a/internal/runtime/executor/xai_websockets_executor_test.go b/internal/runtime/executor/xai_websockets_executor_test.go index 4a8bc31dc..a3a717e6a 100644 --- a/internal/runtime/executor/xai_websockets_executor_test.go +++ b/internal/runtime/executor/xai_websockets_executor_test.go @@ -331,9 +331,11 @@ func TestXAIWebsocketsExecuteStreamRewritesRepeatedResponseIDForDownstream(t *te } func TestXAIWebsocketsExecuteStreamCompactionTriggerUsesHTTPCompactWithRecordedContext(t *testing.T) { + nativeEncryptedContent := testValidGrokEncryptedContent() upgrader := websocket.Upgrader{CheckOrigin: func(*http.Request) bool { return true }} capturedWebsocketPayload := make(chan []byte, 1) capturedCompactPayload := make(chan []byte, 1) + compactResponse := []byte(fmt.Sprintf(`{"id":"resp_compact","model":"grok-4.3","output":[{"type":"compaction","encrypted_content":%q}],"usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`, nativeEncryptedContent)) server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case "/responses": @@ -369,7 +371,7 @@ func TestXAIWebsocketsExecuteStreamCompactionTriggerUsesHTTPCompactWithRecordedC } capturedCompactPayload <- bytes.Clone(body) w.Header().Set("Content-Type", "application/json") - _, _ = w.Write([]byte(`{"id":"resp_compact","model":"grok-4.3","output":[{"type":"compaction","encrypted_content":"opaque"}],"usage":{"input_tokens":1,"output_tokens":2,"total_tokens":3}}`)) + _, _ = w.Write(compactResponse) default: t.Errorf("path = %q, want /responses", r.URL.Path) http.Error(w, "unexpected path", http.StatusNotFound) @@ -486,6 +488,9 @@ func TestXAIWebsocketsExecuteStreamCompactionTriggerUsesHTTPCompactWithRecordedC if got := input.Array()[0].Get("type").String(); got != "compaction" { t.Fatalf("post-compaction input[0].type = %q, want compaction; payload=%s", got, payload) } + if got := input.Array()[0].Get("encrypted_content").String(); got != nativeEncryptedContent { + t.Fatalf("post-compaction input[0].encrypted_content = %q, want native sample; payload=%s", got, payload) + } if got := input.Array()[1].Get("id").String(); got != "msg-2" { t.Fatalf("post-compaction input[1].id = %q, want msg-2; payload=%s", got, payload) } diff --git a/internal/signature/gemini_validation_test.go b/internal/signature/gemini_validation_test.go index add57a6b3..0a1023e4c 100644 --- a/internal/signature/gemini_validation_test.go +++ b/internal/signature/gemini_validation_test.go @@ -2,6 +2,10 @@ package signature import ( "encoding/base64" + "encoding/json" + "os" + "path/filepath" + "runtime" "strings" "testing" @@ -391,3 +395,40 @@ func TestValidateGeminiFunctionCallPairing_RejectsSameContentInterleaving(t *tes t.Fatalf("unexpected error: %v", err) } } + +func TestIsValidGeminiThoughtSignature_AgyNativeSamples(t *testing.T) { + samplesPath, ok := agyGeminiThoughtSignatureSamplesPath() + if !ok { + t.Skip("agy gemini corpus missing; run docs/native-prompt-capture/scripts/harvest_agy_gemini_signatures.py") + } + raw, err := os.ReadFile(samplesPath) + if err != nil { + t.Fatalf("read samples: %v", err) + } + var samples []string + if err := json.Unmarshal(raw, &samples); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if len(samples) < 10 { + t.Fatalf("expected >=10 agy gemini thoughtSignature samples, got %d", len(samples)) + } + opts := GeminiThoughtSignatureValidationOptions{RequireKnownEnvelope: false} // agy native mix includes envelopes CPA may still transport-reject separately + for i, sig := range samples { + if !IsValidGeminiThoughtSignature(sig, opts) { + t.Fatalf("sample %d invalid (len=%d prefix=%q)", i, len(sig), sig[:12]) + } + } +} + +func agyGeminiThoughtSignatureSamplesPath() (string, bool) { + _, file, _, ok := runtime.Caller(0) + if !ok { + return "", false + } + repo := filepath.Clean(filepath.Join(filepath.Dir(file), "..", "..")) + path := filepath.Join(repo, "docs", "native-prompt-capture", "corpus", "agy-gemini-thought-signatures", "samples.json") + if _, err := os.Stat(path); err != nil { + return path, false + } + return path, true +} diff --git a/internal/signature/grok_validation.go b/internal/signature/grok_validation.go new file mode 100644 index 000000000..7b9966f96 --- /dev/null +++ b/internal/signature/grok_validation.go @@ -0,0 +1,123 @@ +package signature + +import ( + "encoding/base64" + "fmt" + "math" + "strings" +) + +const ( + // MaxGrokEncryptedContentLen is a transport safety cap for opaque replay blobs. + MaxGrokEncryptedContentLen = 8 * 1024 * 1024 + // MinGrokEncryptedContentDecodedLen is derived from native Grok CLI captures; + // shorter decoded payloads are treated as invalid replay state for xAI upstream. + MinGrokEncryptedContentDecodedLen = 50 + // MinGrokEncryptedContentEntropyRatio rejects obvious non-ciphertext payloads. + // Native samples are >= 0.892 against the sample-size entropy ceiling. + MinGrokEncryptedContentEntropyRatio = 0.85 +) + +type GrokEncryptedContentInfo struct { + RawLen int + DecodedLen int +} + +// InspectGrokEncryptedContent validates the transport shape of xAI/Grok +// reasoning or compaction encrypted_content. This does not prove decryptability. +func InspectGrokEncryptedContent(raw string) (*GrokEncryptedContentInfo, error) { + sig := strings.TrimSpace(raw) + if sig == "" { + return nil, fmt.Errorf("empty Grok encrypted_content") + } + if len(sig) > MaxGrokEncryptedContentLen { + return nil, fmt.Errorf("Grok encrypted_content exceeds maximum length (%d bytes)", MaxGrokEncryptedContentLen) + } + if sig != raw { + return nil, fmt.Errorf("Grok encrypted_content has leading or trailing whitespace") + } + if strings.HasPrefix(sig, "gAAAA") { + return nil, fmt.Errorf("Grok encrypted_content looks like GPT/Codex reasoning signature") + } + if strings.Contains(sig, "=") { + return nil, fmt.Errorf("invalid Grok encrypted_content: expected unpadded standard base64") + } + if index, r, ok := firstInvalidGrokEncryptedContentChar(sig); ok { + return nil, fmt.Errorf("invalid Grok encrypted_content: contains non-base64 character U+%04X at byte %d", r, index) + } + if IsValidClaudeThinkingSignature(sig, ClaudeSignatureValidationOptions{Strict: true}) { + return nil, fmt.Errorf("Grok encrypted_content looks like Claude thinking signature") + } + if _, err := InspectGeminiThoughtSignature(sig, GeminiThoughtSignatureValidationOptions{RequireKnownEnvelope: true}); err == nil { + return nil, fmt.Errorf("Grok encrypted_content looks like Gemini thoughtSignature") + } + + decoded, err := decodeGrokEncryptedContent(sig) + if err != nil { + return nil, err + } + if len(decoded) < MinGrokEncryptedContentDecodedLen { + return nil, fmt.Errorf("invalid Grok encrypted_content: decoded payload too short (%d bytes)", len(decoded)) + } + if entropyRatio := byteEntropyRatio(decoded); entropyRatio < MinGrokEncryptedContentEntropyRatio { + return nil, fmt.Errorf("invalid Grok encrypted_content: decoded payload entropy ratio %.3f below %.3f", entropyRatio, MinGrokEncryptedContentEntropyRatio) + } + return &GrokEncryptedContentInfo{ + RawLen: len(sig), + DecodedLen: len(decoded), + }, nil +} + +func IsValidGrokEncryptedContent(raw string) bool { + _, err := InspectGrokEncryptedContent(raw) + return err == nil +} + +func decodeGrokEncryptedContent(sig string) ([]byte, error) { + decoded, err := base64.RawStdEncoding.DecodeString(sig) + if err != nil { + return nil, fmt.Errorf("invalid Grok encrypted_content: base64 decode failed: %w", err) + } + return decoded, nil +} + +func firstInvalidGrokEncryptedContentChar(sig string) (int, rune, bool) { + for index, r := range sig { + switch { + case r >= 'A' && r <= 'Z': + case r >= 'a' && r <= 'z': + case r >= '0' && r <= '9': + case r == '+' || r == '/': + default: + return index, r, true + } + } + return 0, 0, false +} + +func byteEntropyRatio(buf []byte) float64 { + if len(buf) == 0 { + return 0 + } + var counts [256]int + for _, b := range buf { + counts[b]++ + } + n := float64(len(buf)) + entropy := 0.0 + for _, count := range counts { + if count == 0 { + continue + } + p := float64(count) / n + entropy -= p * math.Log2(p) + } + maxSymbols := len(buf) + if maxSymbols > 256 { + maxSymbols = 256 + } + if maxSymbols <= 1 { + return 0 + } + return entropy / math.Log2(float64(maxSymbols)) +} diff --git a/internal/signature/grok_validation_test.go b/internal/signature/grok_validation_test.go new file mode 100644 index 000000000..69deac2f6 --- /dev/null +++ b/internal/signature/grok_validation_test.go @@ -0,0 +1,255 @@ +package signature + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "google.golang.org/protobuf/encoding/protowire" +) + +func TestInspectGrokEncryptedContent_NativeSamples(t *testing.T) { + path, ok := grokEncryptedContentSamplesPath() + if !ok { + t.Skip("grok encrypted_content corpus missing; run docs/native-prompt-capture/scripts/harvest-grok-encrypted-content.sh") + } + raw, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read samples: %v", err) + } + var samples []string + if err := json.Unmarshal(raw, &samples); err != nil { + t.Fatalf("unmarshal samples: %v", err) + } + if len(samples) == 0 { + t.Fatal("expected native Grok encrypted_content samples") + } + for i, sample := range samples { + if _, err := InspectGrokEncryptedContent(sample); err != nil { + t.Fatalf("sample[%d] should be valid, got %v", i, err) + } + } +} + +func TestInspectGrokEncryptedContent_RejectsAgyGeminiThoughtSignatures(t *testing.T) { + _, file, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("runtime.Caller failed") + } + path := filepath.Join(filepath.Dir(file), "testdata", "agy_gemini_thought_signature_entries.json") + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("agy gemini corpus missing; run harvest_agy_gemini_signatures.py") + } else if err != nil { + t.Fatalf("stat samples: %v", err) + } + raw, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read samples: %v", err) + } + var entries []struct { + ThoughtSignature string `json:"thoughtSignature"` + } + if err := json.Unmarshal(raw, &entries); err != nil { + t.Fatalf("unmarshal samples: %v", err) + } + if len(entries) == 0 { + t.Fatal("expected agy Gemini thought signatures") + } + checkedUnpaddedGemini := false + for i, entry := range entries { + _, err := InspectGrokEncryptedContent(entry.ThoughtSignature) + if err == nil { + t.Fatalf("entry[%d] should not pass as Grok encrypted_content", i) + } + if !strings.Contains(entry.ThoughtSignature, "=") { + checkedUnpaddedGemini = true + if !strings.Contains(err.Error(), "Gemini") { + t.Fatalf("entry[%d] error = %q, want Gemini fast-reject detail", i, err.Error()) + } + } + } + if !checkedUnpaddedGemini { + t.Fatal("expected at least one unpadded Gemini thought signature sample") + } +} + +func TestInspectGrokEncryptedContent_RejectsGeminiThoughtSignatureEnvelope(t *testing.T) { + sample := testGeminiThoughtSignatureEnvelope() + + _, err := InspectGrokEncryptedContent(sample) + if err == nil { + t.Fatal("expected Gemini thoughtSignature envelope to be rejected") + } + if !strings.Contains(err.Error(), "Gemini") { + t.Fatalf("error = %q, want Gemini fast-reject detail", err.Error()) + } +} + +func TestInspectGrokEncryptedContent_RejectsGemini25Field1Envelope(t *testing.T) { + sample := testGemini25Field1ThoughtSignatureEnvelope() + if !IsValidGeminiThoughtSignature(sample, GeminiThoughtSignatureValidationOptions{RequireKnownEnvelope: true}) { + t.Fatal("fixture should be a known Gemini field-1 thoughtSignature") + } + + _, err := InspectGrokEncryptedContent(sample) + if err == nil { + t.Fatal("expected Gemini field-1 thoughtSignature envelope to be rejected") + } + if !strings.Contains(err.Error(), "Gemini") { + t.Fatalf("error = %q, want Gemini fast-reject detail", err.Error()) + } +} + +func TestInspectGrokEncryptedContent_RejectsClaudeThinkingSignature(t *testing.T) { + sample := testUnpaddedClaudeThinkingSignature() + if !IsValidClaudeThinkingSignature(sample, ClaudeSignatureValidationOptions{Strict: true}) { + t.Fatal("fixture should be a strict Claude thinking signature") + } + + _, err := InspectGrokEncryptedContent(sample) + if err == nil { + t.Fatal("expected Claude thinking signature to be rejected") + } + if !strings.Contains(err.Error(), "Claude") { + t.Fatalf("error = %q, want Claude fast-reject detail", err.Error()) + } +} + +func TestInspectGrokEncryptedContent_RejectsAntigravityClaudeThinkingSignature(t *testing.T) { + sample := testUnpaddedAntigravityClaudeThinkingSignature() + if !strings.HasPrefix(sample, "R") || strings.Contains(sample, "=") { + t.Fatalf("fixture should be an unpadded R-form Claude signature, got prefix=%q has_padding=%t", sample[:1], strings.Contains(sample, "=")) + } + if !IsValidClaudeThinkingSignature(sample, ClaudeSignatureValidationOptions{Strict: true}) { + t.Fatal("fixture should be a strict Antigravity Claude thinking signature") + } + + _, err := InspectGrokEncryptedContent(sample) + if err == nil { + t.Fatal("expected Antigravity Claude thinking signature to be rejected") + } + if !strings.Contains(err.Error(), "Claude") { + t.Fatalf("error = %q, want Claude fast-reject detail", err.Error()) + } +} + +func TestInspectGrokEncryptedContent_RejectsForeignShapes(t *testing.T) { + cases := []string{ + "", + "bad", + " opaque", + "gAAAAABinvalid-gpt-shape", + "abcd_efg", + base64.StdEncoding.EncodeToString(bytes.Repeat([]byte{0xa5}, MinGrokEncryptedContentDecodedLen)), + } + for _, sample := range cases { + if _, err := InspectGrokEncryptedContent(sample); err == nil { + t.Fatalf("expected invalid Grok encrypted_content, got pass for %q", sample) + } + } +} + +func TestInspectGrokEncryptedContent_RejectsLowEntropyPayload(t *testing.T) { + sample := base64.RawStdEncoding.EncodeToString(bytes.Repeat([]byte{0xa5}, MinGrokEncryptedContentDecodedLen)) + + _, err := InspectGrokEncryptedContent(sample) + if err == nil { + t.Fatal("expected low-entropy payload to be rejected") + } + if !strings.Contains(err.Error(), "entropy ratio") { + t.Fatalf("error = %q, want entropy ratio detail", err.Error()) + } +} + +func TestInspectGrokEncryptedContent_RejectsInvalidBase64Length(t *testing.T) { + _, err := InspectGrokEncryptedContent("AAAAA") + if err == nil { + t.Fatal("expected invalid base64 length to be rejected") + } + if !strings.Contains(err.Error(), "base64 decode failed") { + t.Fatalf("error = %q, want base64 decode detail", err.Error()) + } +} + +func TestByteEntropyRatio_SingleByteReturnsZero(t *testing.T) { + if got := byteEntropyRatio([]byte{0xa5}); got != 0 { + t.Fatalf("byteEntropyRatio(single byte) = %v, want 0", got) + } +} + +func testGeminiThoughtSignatureEnvelope() string { + payload := []byte{0x01, 0x0c} + for i := 0; i < 97; i++ { + payload = append(payload, byte(i)) + } + inner := []byte{0x0a, byte(len(payload))} + inner = append(inner, payload...) + outer := []byte{0x12, byte(len(inner))} + outer = append(outer, inner...) + return base64.RawStdEncoding.EncodeToString(outer) +} + +func testGemini25Field1ThoughtSignatureEnvelope() string { + payload := []byte{0x01} + for i := 0; len(payload) < 128; i++ { + payload = append(payload, byte((i*37+11)%251)) + } + + var decoded []byte + decoded = protowire.AppendTag(decoded, 1, protowire.BytesType) + decoded = protowire.AppendBytes(decoded, payload) + return base64.RawStdEncoding.EncodeToString(decoded) +} + +func testUnpaddedClaudeThinkingSignature() string { + return testClaudeThinkingSignatureWithOpaqueLen(35) +} + +func testUnpaddedAntigravityClaudeThinkingSignature() string { + return base64.StdEncoding.EncodeToString([]byte(testClaudeThinkingSignatureWithOpaqueLen(41))) +} + +func testClaudeThinkingSignatureWithOpaqueLen(opaqueLen int) string { + var channelBlock []byte + channelBlock = protowire.AppendTag(channelBlock, 1, protowire.VarintType) + channelBlock = protowire.AppendVarint(channelBlock, 12) + channelBlock = protowire.AppendTag(channelBlock, 2, protowire.VarintType) + channelBlock = protowire.AppendVarint(channelBlock, 2) + channelBlock = protowire.AppendTag(channelBlock, 6, protowire.BytesType) + channelBlock = protowire.AppendString(channelBlock, "claude-sonnet-4-6") + + var container []byte + container = protowire.AppendTag(container, 1, protowire.BytesType) + container = protowire.AppendBytes(container, channelBlock) + + var payload []byte + payload = protowire.AppendTag(payload, 2, protowire.BytesType) + payload = protowire.AppendBytes(payload, container) + payload = protowire.AppendTag(payload, 3, protowire.VarintType) + payload = protowire.AppendVarint(payload, 1) + payload = protowire.AppendTag(payload, 4, protowire.BytesType) + opaque := make([]byte, 0, opaqueLen) + for i := 0; len(opaque) < opaqueLen; i++ { + opaque = append(opaque, byte((i*41+17)%251)) + } + payload = protowire.AppendBytes(payload, opaque) + return base64.StdEncoding.EncodeToString(payload) +} + +func grokEncryptedContentSamplesPath() (string, bool) { + _, file, _, ok := runtime.Caller(0) + if !ok { + return "", false + } + repo := filepath.Clean(filepath.Join(filepath.Dir(file), "..", "..")) + path := filepath.Join(repo, "docs", "native-prompt-capture", "corpus", "grok-encrypted-content", "samples.json") + if _, err := os.Stat(path); err != nil { + return path, false + } + return path, true +}