diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 399368125..73187963c 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -465,7 +465,17 @@ func filterCodexReasoningReplayItemsForInput(body []byte, items [][]byte) [][]by hasInputReasoning := codexInputHasValidReasoningEncryptedContent(body) existingCalls := make(map[string]bool) + existingOutputs := make(map[string]bool) for _, inputItem := range input.Array() { + itemType := strings.TrimSpace(inputItem.Get("type").String()) + if itemType == "function_call_output" || itemType == "custom_tool_call_output" { + callID := strings.TrimSpace(inputItem.Get("call_id").String()) + if callID != "" { + for _, candidate := range codexReplayComparableCallIDs(callID) { + existingOutputs[candidate] = true + } + } + } for _, key := range codexReplayToolCallKeys(inputItem) { existingCalls[key] = true } @@ -484,6 +494,20 @@ func filterCodexReasoningReplayItemsForInput(body []byte, items [][]byte) [][]by if len(keys) == 0 || codexReplayAnyToolCallKeyExists(existingCalls, keys) { continue } + // Only inject if there is a matching output in the request + hasMatchingOutput := false + callID := strings.TrimSpace(itemResult.Get("call_id").String()) + if callID != "" { + for _, candidate := range codexReplayComparableCallIDs(callID) { + if existingOutputs[candidate] { + hasMatchingOutput = true + break + } + } + } + if !hasMatchingOutput { + continue + } for _, key := range keys { existingCalls[key] = true } diff --git a/internal/runtime/executor/codex_executor_reasoning_replay_cache_test.go b/internal/runtime/executor/codex_executor_reasoning_replay_cache_test.go index a15007ed3..8c94b146b 100644 --- a/internal/runtime/executor/codex_executor_reasoning_replay_cache_test.go +++ b/internal/runtime/executor/codex_executor_reasoning_replay_cache_test.go @@ -710,6 +710,54 @@ func TestCodexExecutorReasoningReplayCacheReplaysFunctionCallForClaudeToolResult } } +func TestCodexExecutorReasoningReplayCacheDropsFunctionCallWithoutMatchingOutput(t *testing.T) { + internalcache.ClearCodexReasoningReplayCache() + t.Cleanup(internalcache.ClearCodexReasoningReplayCache) + + encryptedContent := validCodexReasoningEncryptedContentForTestSeed(14) + scope := codexReasoningReplayScope{ + modelName: "gpt-5.4", + sessionKey: "claude:session-dropped-tool", + } + cacheCodexReasoningReplayFromCompleted(scope, []byte(`{"response":{"output":[`+ + `{"type":"reasoning","summary":[],"content":null,"encrypted_content":"`+encryptedContent+`"},`+ + `{"type":"function_call","call_id":"call_dropped","name":"TaskCreate","arguments":"{}"}`+ + `]}}`)) + + body := []byte(`{"model":"gpt-5.4","input":[{"type":"message","role":"user","content":[{"type":"input_text","text":"next"}]}]}`) + req := cliproxyexecutor.Request{ + Model: "gpt-5.4", + Payload: []byte(`{ + "model":"gpt-5.4", + "metadata":{"user_id":"{\"device_id\":\"device-test\",\"account_uuid\":\"\",\"session_id\":\"session-dropped-tool\"}"}, + "messages":[{"role":"user","content":[{"type":"text","text":"next"}]}] + }`), + } + + updated, replayScope := applyCodexReasoningReplayCache( + context.Background(), + sdktranslator.FromString("claude"), + req, + cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}, + body, + ) + if replayScope != scope { + t.Fatalf("replay scope = %#v, want %#v", replayScope, scope) + } + if got := gjson.GetBytes(updated, "input.0.type").String(); got != "reasoning" { + t.Fatalf("input.0.type = %q, want reasoning; body=%s", got, string(updated)) + } + if got := gjson.GetBytes(updated, "input.0.encrypted_content").String(); got != encryptedContent { + t.Fatalf("input.0.encrypted_content = %q, want cached reasoning; body=%s", got, string(updated)) + } + if gjson.GetBytes(updated, `input.#(call_id=="call_dropped")`).Exists() { + t.Fatalf("cached function_call without matching output should not be replayed; body=%s", string(updated)) + } + if got := gjson.GetBytes(updated, "input.1.role").String(); got != "user" { + t.Fatalf("input.1.role = %q, want user; body=%s", got, string(updated)) + } +} + func TestCodexExecutorReasoningReplayCacheMatchesShortenedClaudeToolResultCallID(t *testing.T) { internalcache.ClearCodexReasoningReplayCache() t.Cleanup(internalcache.ClearCodexReasoningReplayCache)