From 4056c2590be9785fa93c64b78b199112eef99cc0 Mon Sep 17 00:00:00 2001 From: Matthias319 Date: Fri, 24 Apr 2026 17:13:23 +0200 Subject: [PATCH] fix(codex): classify known upstream failures Normalize Codex context, thinking-signature, previous-response, and auth failures to explicit error codes: context_too_large, thinking_signature_invalid, previous_response_not_found, auth_unavailable. Refs #2596. --- internal/runtime/executor/codex_executor.go | 47 ++++++++++ .../executor/codex_executor_retry_test.go | 89 +++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 38667231a..48b3755ed 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -809,6 +809,7 @@ func newCodexStatusErr(statusCode int, body []byte) statusErr { if isCodexModelCapacityError(body) { errCode = http.StatusTooManyRequests } + body = classifyCodexStatusError(errCode, body) err := statusErr{code: errCode, msg: string(body)} if retryAfter := parseCodexRetryAfter(errCode, body, time.Now()); retryAfter != nil { err.retryAfter = retryAfter @@ -816,6 +817,52 @@ func newCodexStatusErr(statusCode int, body []byte) statusErr { return err } +func classifyCodexStatusError(statusCode int, body []byte) []byte { + code, errType, ok := codexStatusErrorClassification(statusCode, body) + if !ok { + return body + } + message := gjson.GetBytes(body, "error.message").String() + if message == "" { + message = gjson.GetBytes(body, "message").String() + } + if message == "" { + message = strings.TrimSpace(string(body)) + } + if message == "" { + message = http.StatusText(statusCode) + } + out := []byte(`{"error":{}}`) + out, _ = sjson.SetBytes(out, "error.message", message) + out, _ = sjson.SetBytes(out, "error.type", errType) + out, _ = sjson.SetBytes(out, "error.code", code) + return out +} + +func codexStatusErrorClassification(statusCode int, body []byte) (code string, errType string, ok bool) { + errorMessage := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "error.message").String())) + if errorMessage == "" { + errorMessage = strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "message").String())) + } + lower := strings.ToLower(strings.TrimSpace(string(body))) + upstreamCode := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "error.code").String())) + upstreamType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "error.type").String())) + isInvalidRequest := upstreamType == "" || upstreamType == "invalid_request_error" + + switch { + case statusCode == http.StatusRequestEntityTooLarge || upstreamCode == "context_length_exceeded" || upstreamCode == "context_too_large" || isInvalidRequest && (strings.Contains(errorMessage, "context length") || strings.Contains(errorMessage, "context_length") || strings.Contains(errorMessage, "maximum context") || strings.Contains(errorMessage, "too many tokens")): + return "context_too_large", "invalid_request_error", true + case strings.Contains(lower, "invalid signature in thinking block") || strings.Contains(lower, "invalid_encrypted_content"): + return "thinking_signature_invalid", "invalid_request_error", true + case upstreamCode == "previous_response_not_found" || strings.Contains(lower, "previous_response_not_found") || strings.Contains(lower, "previous_response_id") && strings.Contains(lower, "not found"): + return "previous_response_not_found", "invalid_request_error", true + case statusCode == http.StatusUnauthorized || upstreamType == "authentication_error" || upstreamCode == "invalid_api_key" || strings.Contains(lower, "invalid or expired token") || strings.Contains(lower, "refresh_token_reused"): + return "auth_unavailable", "authentication_error", true + default: + return "", "", false + } +} + func normalizeCodexInstructions(body []byte) []byte { instructions := gjson.GetBytes(body, "instructions") if !instructions.Exists() || instructions.Type == gjson.Null { diff --git a/internal/runtime/executor/codex_executor_retry_test.go b/internal/runtime/executor/codex_executor_retry_test.go index 249d40d65..7207d5734 100644 --- a/internal/runtime/executor/codex_executor_retry_test.go +++ b/internal/runtime/executor/codex_executor_retry_test.go @@ -1,6 +1,7 @@ package executor import ( + "encoding/json" "net/http" "strconv" "testing" @@ -73,6 +74,94 @@ func TestNewCodexStatusErrTreatsCapacityAsRetryableRateLimit(t *testing.T) { } } +func TestNewCodexStatusErrClassifiesKnownCodexFailures(t *testing.T) { + tests := []struct { + name string + statusCode int + body []byte + wantStatus int + wantType string + wantCode string + }{ + { + name: "context length status", + statusCode: http.StatusRequestEntityTooLarge, + body: []byte(`{"error":{"message":"context length exceeded","type":"invalid_request_error","code":"context_length_exceeded"}}`), + wantStatus: http.StatusRequestEntityTooLarge, + wantType: "invalid_request_error", + wantCode: "context_too_large", + }, + { + name: "thinking signature", + statusCode: http.StatusBadRequest, + body: []byte(`{"error":{"message":"Invalid signature in thinking block","type":"invalid_request_error","code":"invalid_request_error"}}`), + wantStatus: http.StatusBadRequest, + wantType: "invalid_request_error", + wantCode: "thinking_signature_invalid", + }, + { + name: "previous response missing", + statusCode: http.StatusBadRequest, + body: []byte(`{"error":{"message":"No response found for previous_response_id resp_123","type":"invalid_request_error","code":"previous_response_not_found"}}`), + wantStatus: http.StatusBadRequest, + wantType: "invalid_request_error", + wantCode: "previous_response_not_found", + }, + { + name: "auth unavailable", + statusCode: http.StatusUnauthorized, + body: []byte(`{"error":{"message":"invalid or expired token","type":"authentication_error","code":"invalid_api_key"}}`), + wantStatus: http.StatusUnauthorized, + wantType: "authentication_error", + wantCode: "auth_unavailable", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + err := newCodexStatusErr(tc.statusCode, tc.body) + + if got := err.StatusCode(); got != tc.wantStatus { + t.Fatalf("status code = %d, want %d", got, tc.wantStatus) + } + assertCodexErrorCode(t, err.Error(), tc.wantType, tc.wantCode) + }) + } +} + +func TestNewCodexStatusErrPreservesUnclassifiedErrors(t *testing.T) { + body := []byte(`{"error":{"message":"documentation mentions too many tokens, but this is a billing configuration failure","type":"server_error","code":"billing_config_error"}}`) + + err := newCodexStatusErr(http.StatusBadGateway, body) + + if got := err.StatusCode(); got != http.StatusBadGateway { + t.Fatalf("status code = %d, want %d", got, http.StatusBadGateway) + } + if got := err.Error(); got != string(body) { + t.Fatalf("error body = %s, want original %s", got, string(body)) + } +} + +func assertCodexErrorCode(t *testing.T, raw string, wantType string, wantCode string) { + t.Helper() + + var payload struct { + Error struct { + Type string `json:"type"` + Code string `json:"code"` + } `json:"error"` + } + if err := json.Unmarshal([]byte(raw), &payload); err != nil { + t.Fatalf("error body is not valid JSON: %v; body=%s", err, raw) + } + if payload.Error.Type != wantType { + t.Fatalf("error.type = %q, want %q; body=%s", payload.Error.Type, wantType, raw) + } + if payload.Error.Code != wantCode { + t.Fatalf("error.code = %q, want %q; body=%s", payload.Error.Code, wantCode, raw) + } +} + func itoa(v int64) string { return strconv.FormatInt(v, 10) }