feat(assistant): upgrade default models to gpt-5.4-nano and gpt-5.3-codex (#44107)

Replaces `gpt-5-mini` and `gpt-5` with `gpt-5.4-nano` and
`gpt-5.3-codex` respectively. Clients with stale model IDs in IndexedDB
will gracefully reset to the new defaults. While we can technically keep
the existing models around, we've
[opted](https://supabase.slack.com/archives/C051L8U2EJF/p1774283070517609?thread_ts=1773771991.871669&cid=C051L8U2EJF)
to replace them w/ the newer models for simplicity. Basic completion
endpoints use `'none'` reasoning level for optimal speed.

Rationale for these models is they provide they best balance of
intelligence/speed and cost. GPT-5.4-nano is less expensive (0.8x
price), faster, and smarter than GPT-5-mini. GPT-5.4-mini would be even
smarter but is 3x the price. GPT-5.3-Codex is ~1.4x the price of GPT-5,
while GPT-5.4 would be 2x price, but 5.3-Codex is still a big
intelligence boost from GPT-5.

See [eval
comparison](https://www.braintrust.dev/app/supabase.io/p/Assistant/experiments/mattrossman%2Fai-509-v2-upgrade-assistant-models-beyond-gpt-5-family-1774468619?c=master-1774458837&diff=between_experiments),
scores are relatively stable and conciseness naturally improves on
gpt-5.4-nano.

Other change:
- Fixed an eval test case to clarify that https://supabase.help is also
a correct URL for submitting support ticket, which was unfairly scored
as incorrect
[here](https://www.braintrust.dev/app/supabase.io/p/Assistant/trace?object_type=experiment&object_id=5244cccd-23b2-4f79-9dd2-287f1b40ebad&r=bac9b903-8bde-4c21-99dd-e0ed141c4f9e&s=f248fbf5-75bf-4aab-be0a-87a4298e6d11)

I sanity checked the Assistant, natural language filters, and SQL Editor
completions on staging preview.

References:
- https://openai.com/index/introducing-gpt-5-4-mini-and-nano/
- https://openai.com/index/introducing-gpt-5-3-codex/
- https://developers.openai.com/api/docs/pricing

Closes AI-509
This commit is contained in:
Matt Rossman
2026-03-26 02:35:54 -04:00
committed by GitHub
parent ba5eedcefa
commit 0c5f64fcba
5 changed files with 55 additions and 47 deletions

View File

@@ -109,7 +109,8 @@ export const dataset: AssistantEvalCase[] = [
{
input: { prompt: 'Where can I go to create a support ticket?' },
expected: {
correctAnswer: 'https://supabase.com/dashboard/support/new',
correctAnswer:
'https://supabase.com/dashboard/support/new (or https://supabase.help which redirects there)',
},
metadata: {
category: ['general_help'],

View File

@@ -51,11 +51,11 @@ describe('getModel', () => {
const { modelParams, promptProviderOptions } = await getModel({
provider: 'openai',
modelEntry: openaiModelEntry({ id: 'gpt-5-mini' }),
modelEntry: openaiModelEntry({ id: 'gpt-5.4-nano' }),
})
expect(modelParams?.model).toEqual('openai-model')
expect(openai).toHaveBeenCalledWith('gpt-5-mini')
expect(openai).toHaveBeenCalledWith('gpt-5.4-nano')
expect(promptProviderOptions).toBeUndefined()
})
@@ -64,24 +64,24 @@ describe('getModel', () => {
const { error } = await getModel({
provider: 'openai',
modelEntry: openaiModelEntry({ id: 'gpt-5-mini' }),
modelEntry: openaiModelEntry({ id: 'gpt-5.4-nano' }),
})
expect(error).toEqual(new Error('OPENAI_API_KEY not available'))
})
it('returns openai gpt-5 when hasAccessToAdvanceModel and not throttled', async () => {
it('returns openai gpt-5.3-codex when hasAccessToAdvanceModel and not throttled', async () => {
vi.stubEnv('OPENAI_API_KEY', 'test-key')
vi.stubEnv('IS_THROTTLED', 'false')
const { modelParams, error } = await getModel({
provider: 'openai',
modelEntry: openaiModelEntry({ id: 'gpt-5', reasoningEffort: 'minimal' }),
modelEntry: openaiModelEntry({ id: 'gpt-5.3-codex', reasoningEffort: 'low' }),
})
expect(error).toBeUndefined()
expect(modelParams?.model).toEqual('openai-model')
expect(openai).toHaveBeenCalledWith('gpt-5')
expect(modelParams?.providerOptions?.openai?.reasoningEffort).toBe('minimal')
expect(openai).toHaveBeenCalledWith('gpt-5.3-codex')
expect(modelParams?.providerOptions?.openai?.reasoningEffort).toBe('low')
})
it('applies reasoningEffort from DEFAULT_COMPLETION_MODEL', async () => {
@@ -93,7 +93,7 @@ describe('getModel', () => {
})
expect(error).toBeUndefined()
expect(openai).toHaveBeenCalledWith('gpt-5-mini')
expect(modelParams?.providerOptions?.openai?.reasoningEffort).toBe('minimal')
expect(openai).toHaveBeenCalledWith('gpt-5.4-nano')
expect(modelParams?.providerOptions?.openai?.reasoningEffort).toBe('none')
})
})

View File

@@ -25,7 +25,7 @@ describe('model.utils', () => {
it('should return correct default for openai provider', () => {
const result = getDefaultModelForProvider('openai')
expect(result).toBe('gpt-5-mini')
expect(result).toBe('gpt-5.4-nano')
})
it('should return undefined for unknown provider', () => {
@@ -47,8 +47,8 @@ describe('model.utils', () => {
it('should have openai provider with models', () => {
expect(PROVIDERS.openai).toBeDefined()
expect(PROVIDERS.openai.models).toBeDefined()
expect(Object.keys(PROVIDERS.openai.models)).toContain('gpt-5')
expect(Object.keys(PROVIDERS.openai.models)).toContain('gpt-5-mini')
expect(Object.keys(PROVIDERS.openai.models)).toContain('gpt-5.3-codex')
expect(Object.keys(PROVIDERS.openai.models)).toContain('gpt-5.4-nano')
})
it('should have exactly one default model per provider', () => {
@@ -111,48 +111,50 @@ describe('model.utils', () => {
})
it('defaults should satisfy unions', () => {
expect(DEFAULT_ASSISTANT_BASE_MODEL_ID).toBe('gpt-5-mini')
expect(DEFAULT_ASSISTANT_ADVANCE_MODEL_ID).toBe('gpt-5')
expect(DEFAULT_ASSISTANT_BASE_MODEL_ID).toBe('gpt-5.4-nano')
expect(DEFAULT_ASSISTANT_ADVANCE_MODEL_ID).toBe('gpt-5.3-codex')
expect(defaultAssistantModelId(false)).toBe(DEFAULT_ASSISTANT_BASE_MODEL_ID)
expect(defaultAssistantModelId(true)).toBe(DEFAULT_ASSISTANT_ADVANCE_MODEL_ID)
})
it('isAssistantBaseModelId / isAdvanceOnlyModelId', () => {
expect(isAssistantBaseModelId('gpt-5-mini')).toBe(true)
expect(isAssistantBaseModelId('gpt-5')).toBe(false)
expect(isAdvanceOnlyModelId('gpt-5')).toBe(true)
expect(isAdvanceOnlyModelId('gpt-5-mini')).toBe(false)
expect(isAssistantBaseModelId('gpt-5.4-nano')).toBe(true)
expect(isAssistantBaseModelId('gpt-5.3-codex')).toBe(false)
expect(isAdvanceOnlyModelId('gpt-5.3-codex')).toBe(true)
expect(isAdvanceOnlyModelId('gpt-5.4-nano')).toBe(false)
})
it('isKnownAssistantModelId', () => {
expect(isKnownAssistantModelId('gpt-5-mini')).toBe(true)
expect(isKnownAssistantModelId('gpt-5')).toBe(true)
expect(isKnownAssistantModelId('gpt-5.4-nano')).toBe(true)
expect(isKnownAssistantModelId('gpt-5.3-codex')).toBe(true)
expect(isKnownAssistantModelId('gpt-5')).toBe(false)
expect(isKnownAssistantModelId('gpt-5-mini')).toBe(false)
expect(isKnownAssistantModelId('unknown')).toBe(false)
})
it('getAssistantModelEntry returns config for known ids', () => {
expect(getAssistantModelEntry('gpt-5-mini').reasoningEffort).toBe('minimal')
expect(getAssistantModelEntry('gpt-5').reasoningEffort).toBe('minimal')
expect(getAssistantModelEntry('gpt-5-mini')).toEqual(
ASSISTANT_MODELS.find((m) => m.id === 'gpt-5-mini')
expect(getAssistantModelEntry('gpt-5.4-nano').reasoningEffort).toBe('low')
expect(getAssistantModelEntry('gpt-5.3-codex').reasoningEffort).toBe('low')
expect(getAssistantModelEntry('gpt-5.4-nano')).toEqual(
ASSISTANT_MODELS.find((m) => m.id === 'gpt-5.4-nano')
)
})
it('DEFAULT_COMPLETION_MODEL is gpt-5-mini with minimal reasoning effort', () => {
it('DEFAULT_COMPLETION_MODEL is gpt-5.4-nano with no reasoning effort', () => {
expect(DEFAULT_COMPLETION_MODEL.id).toBe(DEFAULT_ASSISTANT_BASE_MODEL_ID)
expect(DEFAULT_COMPLETION_MODEL.reasoningEffort).toBe('minimal')
expect(DEFAULT_COMPLETION_MODEL.reasoningEffort).toBe('none')
})
it('openaiModelEntry enforces valid reasoning effort at compile time', () => {
// Valid: supported effort level
const withEffort = openaiModelEntry({
id: 'gpt-5-mini',
id: 'gpt-5.4-nano',
reasoningEffort: 'low',
})
expect(withEffort.reasoningEffort).toBe('low')
// Valid: no effort
const withoutEffort = openaiModelEntry({ id: 'gpt-5-mini' })
const withoutEffort = openaiModelEntry({ id: 'gpt-5.4-nano' })
expect(withoutEffort.reasoningEffort).toBeUndefined()
})
})

View File

@@ -2,17 +2,17 @@ export type ProviderName = 'bedrock' | 'openai'
export type BedrockModel = 'anthropic.claude-3-7-sonnet-20250219-v1:0' | 'openai.gpt-oss-120b-1:0'
export type OpenAIModelId = 'gpt-5' | 'gpt-5-mini'
export type OpenAIModelId = 'gpt-5.4-nano' | 'gpt-5.3-codex'
// Source: https://developers.openai.com/api/docs/guides/reasoning + per-model pages
export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'
// Per-model reasoning effort compatibility.
// When adding a model, verify supported levels in the community matrix and add an entry:
// https://community.openai.com/t/request-for-compatibility-matrix-reasoning-effort-sampling-parameters-across-gpt-5-series/1371738/2
// Sources: https://developers.openai.com/api/docs/models/gpt-5.4-nano
// https://developers.openai.com/api/docs/models/gpt-5.3-codex
type ModelReasoningSupport = {
'gpt-5': 'minimal' | 'low' | 'medium' | 'high'
'gpt-5-mini': 'minimal' | 'low' | 'medium' | 'high'
'gpt-5.4-nano': 'none' | 'low' | 'medium' | 'high' | 'xhigh'
'gpt-5.3-codex': 'low' | 'medium' | 'high' | 'xhigh'
}
type ReasoningEffortFor<ModelId extends OpenAIModelId> = ModelId extends keyof ModelReasoningSupport
@@ -47,22 +47,22 @@ export type OpenAIModelEntry = ReturnType<typeof openaiModelEntry>
/** Default model entry for simple completion endpoints where latency is more important than reasoning. */
export const DEFAULT_COMPLETION_MODEL = openaiModelEntry({
id: 'gpt-5-mini',
reasoningEffort: 'minimal',
id: 'gpt-5.4-nano',
reasoningEffort: 'none',
})
// Single source of truth for all Assistant chat model variants and their reasoning levels.
// Models with requiresAdvanceModelEntitlement false are available to all users; true requires the assistant.advance_model entitlement.
export const ASSISTANT_MODELS = [
openaiModelEntry({
id: 'gpt-5-mini',
id: 'gpt-5.4-nano',
requiresAdvanceModelEntitlement: false,
reasoningEffort: 'minimal',
reasoningEffort: 'low',
}),
openaiModelEntry({
id: 'gpt-5',
id: 'gpt-5.3-codex',
requiresAdvanceModelEntitlement: true,
reasoningEffort: 'minimal',
reasoningEffort: 'low',
}),
] as const
@@ -77,9 +77,9 @@ const ASSISTANT_MODELS_MAP = Object.fromEntries(ASSISTANT_MODELS.map((m) => [m.i
(typeof ASSISTANT_MODELS)[number]
>
export const DEFAULT_ASSISTANT_BASE_MODEL_ID = 'gpt-5-mini' satisfies AssistantBaseModelId
export const DEFAULT_ASSISTANT_BASE_MODEL_ID = 'gpt-5.4-nano' satisfies AssistantBaseModelId
export const DEFAULT_ASSISTANT_ADVANCE_MODEL_ID = 'gpt-5' satisfies AssistantModelId
export const DEFAULT_ASSISTANT_ADVANCE_MODEL_ID = 'gpt-5.3-codex' satisfies AssistantModelId
export function defaultAssistantModelId(hasAccessToAdvanceModel: boolean): AssistantModelId {
return hasAccessToAdvanceModel
@@ -148,8 +148,8 @@ export const PROVIDERS: ProviderRegistry = {
},
openai: {
models: {
'gpt-5': { default: false },
'gpt-5-mini': { default: true },
'gpt-5.3-codex': { default: false },
'gpt-5.4-nano': { default: true },
},
providerOptions: {
openai: {

View File

@@ -8,6 +8,7 @@ import { proxy, ref, snapshot, subscribe, useSnapshot } from 'valtio'
import { constructHeaders } from 'data/fetchers'
import { prepareMessagesForAPI } from 'lib/ai/message-utils'
import { isKnownAssistantModelId } from 'lib/ai/model.utils'
import type { AssistantModelId } from 'lib/ai/model.utils'
import { BASE_PATH, IS_PLATFORM } from 'lib/constants'
@@ -46,7 +47,7 @@ type AiAssistantData = {
tables: { schema: string; name: string }[]
chats: Record<string, ChatSession>
activeChatId?: string
model: AssistantModel
model?: AssistantModel
context: AiAssistantContext
}
@@ -65,7 +66,7 @@ const INITIAL_AI_ASSISTANT: AiAssistantData = {
tables: [],
chats: {},
activeChatId: undefined,
model: 'gpt-5',
model: undefined,
context: {},
}
@@ -487,7 +488,11 @@ export const createAiAssistantState = (): AiAssistantState => {
loadPersistedState: (persistedState: StoredAiAssistantState) => {
state.chats = persistedState.chats
state.activeChatId = persistedState.activeChatId
state.model = persistedState.model ?? INITIAL_AI_ASSISTANT.model
const storedModel = persistedState.model
state.model =
storedModel && isKnownAssistantModelId(storedModel)
? storedModel
: INITIAL_AI_ASSISTANT.model
// Ensure an active chat exists after loading
if (!state.activeChat) {