Files
supabase/apps/studio/pages/api/ai/sql/generate-v4.ts
Matt Rossman 072883bcec feat: assistant evals (#41311)
* chore: bump `supabase` CLI

* chore: stricter message types in `generate-v4.ts`

* feat: tutorial eval

https://www.braintrust.dev/docs/evaluation

* feat: project ID for eval

* refactor: `generateAssistantResponse` out of `handlePost`

* refactor: generateAssistantResponse to lib/ai

* feat: factuality eval with assistant response

* chore: upgrade braintrust to v1.0.1

* chore: silence tsconfig warning

* feat: assertion scorer

* fix: aggregate tools across all steps

* refactor: strict tool names, remove need for `as const`

* refactor: generic tool name type in assertions

* feat: transfer mocks from `feature/braintrust`

* feat: LLM criteria assertion

* feat: braintrust evals workflow

* fix: BRAINTRUST_PROJECT_ID

* feat: `sql_similar` assertion

* fix: `OPENAI_API_KEY` in workflow env

* feat: split AssertionScorer into separate scorers

* feat: remove tutorial eval

* feat: 20 minute CI timeout

* feat: category in test case metadata

* feat: score with gpt-5

* refactor: dataset to own file, colocate scorers

* feat: "gpt-5.2-2025-12-11" for llm as a judge

* feat: SQL syntax scorer with `libpg-query`

* feat: `evals:setup` and `evals:run` scripts

* feat: `evals:setup` in CI

* feat: human readable scorer names

* chore: rename to "SQL Validity"

* feat: add 2 "sql_generation" test cases

* feat: update requiredTools in test cases

* chore: ignore Cursor MCP config

* feat: "Conciseness" score

* feat: "Completeness" scorer

* fix: generate-v4 test mocks

* feat: serialize "steps" for scorer inputs

* updated node mem options for typecheck

* updated runner

* remove ram update as actions handle this

* feat: read `BRAINTRUST_PROJECT_ID` from secrets

* feat: score helpfulness, remove old scorers

* feat: separate `evals:run` and `evals:upload` scripts

* feat: passthrough entire classifier result

* feat: use live `search_docs` impl, store docs result in metadata

* feat: reduce classifier options

* feat: filter workflow by `run-evals` PR label or `master` branch

* chore: cleanup stubbed mock tools

* fix: checkout actual branch with `ref:`

* fix: capture search_docs results from all content parts

* feat: simplify sql syntax score calculation

* feat: use AI SDK's UI message validator

* docs: justification for relative `extends`

* fix: cleanup leftover validatedMessages

* doc: note mock token isn't secret for snyk

* fix: mock ui message to pass validation

* feat: revert ignoring Cursor MCP config

Using `.git/info/exclude` instead until we have an opinion on this

* feat: add "tsconfig" as shared-data devDependency, revert relative path in tsconfig

* refactor: tool call parsing into function

* Update apps/studio/evals/assistant.eval.ts

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* refactor: organize mock schemas and tool factories

---------

Co-authored-by: Ali Waseem <waseema393@gmail.com>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
2025-12-22 23:45:48 -05:00

206 lines
5.4 KiB
TypeScript

import pgMeta from '@supabase/pg-meta'
import { safeValidateUIMessages } from 'ai'
import type { NextApiRequest, NextApiResponse } from 'next'
import z from 'zod'
import { IS_PLATFORM } from 'common'
import { executeSql } from 'data/sql/execute-sql-query'
import type { AiOptInLevel } from 'hooks/misc/useOrgOptedIntoAi'
import { getModel } from 'lib/ai/model'
import { getOrgAIDetails } from 'lib/ai/org-ai-details'
import { generateAssistantResponse } from 'lib/ai/generate-assistant-response'
import { getTools } from 'lib/ai/tools'
import apiWrapper from 'lib/api/apiWrapper'
import { executeQuery } from 'lib/api/self-hosted/query'
export const maxDuration = 120
export const config = {
api: {
bodyParser: {
sizeLimit: '5mb',
},
},
}
async function handler(req: NextApiRequest, res: NextApiResponse) {
const { method } = req
switch (method) {
case 'POST':
return handlePost(req, res)
default:
res.setHeader('Allow', ['POST'])
res.status(405).json({
data: null,
error: { message: `Method ${method} Not Allowed` },
})
}
}
const wrapper = (req: NextApiRequest, res: NextApiResponse) =>
apiWrapper(req, res, handler, { withAuth: true })
export default wrapper
const requestBodySchema = z.object({
messages: z.array(z.any()),
projectRef: z.string(),
connectionString: z.string(),
schema: z.string().optional(),
table: z.string().optional(),
chatName: z.string().optional(),
orgSlug: z.string().optional(),
model: z.enum(['gpt-5', 'gpt-5-mini']).optional(),
})
async function handlePost(req: NextApiRequest, res: NextApiResponse) {
const authorization = req.headers.authorization
const accessToken = authorization?.replace('Bearer ', '')
if (IS_PLATFORM && !accessToken) {
return res.status(401).json({ error: 'Authorization token is required' })
}
const body = typeof req.body === 'string' ? JSON.parse(req.body) : req.body
const { data, error: parseError } = requestBodySchema.safeParse(body)
if (parseError) {
return res.status(400).json({ error: 'Invalid request body', issues: parseError.issues })
}
const {
messages: rawMessages,
projectRef,
connectionString,
orgSlug,
chatName,
model: requestedModel,
} = data
const messagesValidation = await safeValidateUIMessages({ messages: rawMessages })
if (!messagesValidation.success) {
return res
.status(400)
.json({ error: 'Invalid request body', message: messagesValidation.error.message })
}
const messages = messagesValidation.data
let aiOptInLevel: AiOptInLevel = 'disabled'
let isLimited = false
if (!IS_PLATFORM) {
aiOptInLevel = 'schema'
}
if (IS_PLATFORM && orgSlug && authorization && projectRef) {
try {
// Get organizations and compute opt in level server-side
const { aiOptInLevel: orgAIOptInLevel, isLimited: orgAILimited } = await getOrgAIDetails({
orgSlug,
authorization,
projectRef,
})
aiOptInLevel = orgAIOptInLevel
isLimited = orgAILimited
} catch (error) {
return res.status(400).json({
error: 'There was an error fetching your organization details',
})
}
}
const {
model,
error: modelError,
promptProviderOptions,
providerOptions,
} = await getModel({
provider: 'openai',
model: requestedModel ?? 'gpt-5',
routingKey: projectRef,
isLimited,
})
if (modelError) {
return res.status(500).json({ error: modelError.message })
}
try {
const abortController = new AbortController()
req.on('close', () => abortController.abort())
req.on('aborted', () => abortController.abort())
const tools = await getTools({
projectRef,
connectionString,
authorization,
aiOptInLevel,
accessToken,
})
// Get a list of all schemas to add to context
const getSchemas = async (): Promise<string> => {
const pgMetaSchemasList = pgMeta.schemas.list()
type Schemas = z.infer<(typeof pgMetaSchemasList)['zod']>
const { result: schemas } = await executeSql<Schemas>(
{
projectRef,
connectionString,
sql: pgMetaSchemasList.sql,
},
undefined,
{
'Content-Type': 'application/json',
...(authorization && { Authorization: authorization }),
},
IS_PLATFORM ? undefined : executeQuery
)
return schemas?.length > 0
? `The available database schema names are: ${JSON.stringify(schemas)}`
: "You don't have access to any schemas."
}
const result = await generateAssistantResponse({
messages,
model,
tools,
aiOptInLevel,
getSchemas: aiOptInLevel !== 'disabled' ? getSchemas : undefined,
projectRef,
chatName,
promptProviderOptions,
providerOptions,
abortSignal: abortController.signal,
})
result.pipeUIMessageStreamToResponse(res, {
sendReasoning: true,
onError: (error) => {
if (error == null) {
return 'unknown error'
}
if (typeof error === 'string') {
return error
}
if (error instanceof Error) {
return error.message
}
return JSON.stringify(error)
},
})
} catch (error) {
console.error('Error in handlePost:', error)
if (error instanceof Error) {
return res.status(500).json({ message: error.message })
}
return res.status(500).json({ message: 'An unexpected error occurred.' })
}
}