From 5de89e48f20e731e6fe9002574e5fdb6bf3b92bb Mon Sep 17 00:00:00 2001
From: Matt Rossman <22670878+mattrossman@users.noreply.github.com>
Date: Mon, 4 May 2026 18:17:09 -0400
Subject: [PATCH] refactor(studio): address second round of review feedback on
 trace-level scorers

---
 apps/studio/evals/scorer-wasm.ts         |  4 +-
 apps/studio/evals/scorer.ts              |  7 +++-
 apps/studio/lib/ai/tools/studio-tools.ts | 52 +++++++++++++-----------
 3 files changed, 34 insertions(+), 29 deletions(-)
diff --git a/apps/studio/evals/scorer-wasm.ts b/apps/studio/evals/scorer-wasm.ts
index bf32088825..7a4798c7c0 100644
--- a/apps/studio/evals/scorer-wasm.ts
+++ b/apps/studio/evals/scorer-wasm.ts
@@ -1,12 +1,10 @@
 import { EvalScorer, Trace } from 'braintrust'
 import { parse } from 'libpg-query'
-import { z } from 'zod'
 
 import { AssistantEvalInput, AssistantEvalOutput, Expected, getToolSpans } from './scorer'
+import { executeSqlInputSchema } from '@/lib/ai/tools/studio-tools'
 import { extractIdentifiers, isQuotedInSql, needsQuoting } from '@/lib/sql-identifier-quoting'
 
-const executeSqlInputSchema = z.object({ sql: z.string() })
-
 /** Extracts SQL strings from all `execute_sql` tool spans in the trace. */
 async function getSqlQueries(trace: Trace): Promise<string[]> {
   const spans = await getToolSpans(trace, 'execute_sql')
diff --git a/apps/studio/evals/scorer.ts b/apps/studio/evals/scorer.ts
index ba0b61f884..515dde253e 100644
--- a/apps/studio/evals/scorer.ts
+++ b/apps/studio/evals/scorer.ts
@@ -4,6 +4,7 @@ import { EvalCase, EvalScorer, SpanData, Trace } from 'braintrust'
 import { stripIndent } from 'common-tags'
 import { z } from 'zod'
 
+import { loadKnowledgeInputSchema } from '@/lib/ai/tools/studio-tools'
 import { extractUrls } from '@/lib/helpers'
 
 const LLM_AS_A_JUDGE_MODEL = 'gpt-5.2' // NOTE: `gpt-5.2-2025-12-11` snapshot not yet working with online scorers
@@ -52,10 +53,10 @@ export type AssistantEvalCase = EvalCase<AssistantEvalInput, Expected, Assistant
 
 const chatMessageSchema = z.object({ role: z.string(), content: z.unknown() })
 const textContentBlockSchema = z.object({ type: z.literal('text'), text: z.string() })
-const loadKnowledgeInputSchema = z.object({ name: z.string() })
 // search_docs returns { content: [{ text: string }] } where each text is a JSON doc string
 const searchDocsOutputSchema = z.object({ content: z.array(z.object({ text: z.string() })) })
 
+/** Extracts plain text from a message content field (string or content-block array). */
 function extractMessageText(content: unknown): string {
   if (typeof content === 'string') return content
   if (!Array.isArray(content)) return ''
@@ -67,6 +68,7 @@ function extractMessageText(content: unknown): string {
     .join('\n')
 }
 
+/** Returns the text of the last assistant message in the trace thread, or null if none. */
 async function getLastAssistantText(trace: Trace): Promise<string | null> {
   const thread = await trace.getThread()
   for (let i = thread.length - 1; i >= 0; i--) {
@@ -94,6 +96,7 @@ async function getConversationContext(trace: Trace): Promise<string> {
     .join('\n\n')
 }
 
+/** Returns tool spans from the trace, optionally filtered to a specific tool name. */
 export async function getToolSpans(trace: Trace, toolName?: string): Promise<SpanData[]> {
   const spans = await trace.getSpans({ spanType: ['tool'] })
   if (!toolName) return spans
@@ -130,7 +133,7 @@ export const knowledgeUsageScorer: EvalScorer<
   if (!expected.requiredKnowledge || !trace) return null
 
   const knowledgeSpans = await getToolSpans(trace, 'load_knowledge')
-  const loadedKnowledge = knowledgeSpans.flatMap((s) => {
+  const loadedKnowledge: string[] = knowledgeSpans.flatMap((s) => {
     const r = loadKnowledgeInputSchema.safeParse(s.input)
     return r.success ? [r.data.name] : []
   })
diff --git a/apps/studio/lib/ai/tools/studio-tools.ts b/apps/studio/lib/ai/tools/studio-tools.ts
index 1279e3f12a..b5c84ba668 100644
--- a/apps/studio/lib/ai/tools/studio-tools.ts
+++ b/apps/studio/lib/ai/tools/studio-tools.ts
@@ -18,28 +18,36 @@ const KNOWLEDGE = {
 
 type KnowledgeName = keyof typeof KNOWLEDGE
 
+export const executeSqlInputSchema = z.object({
+  // Transform at parse time so the corrected SQL is what gets stored in
+  // toolCall.input — ensuring evals and logs reflect what actually runs.
+  sql: z.string().describe('The SQL statement to execute.').transform(fixSqlBackslashEscapes),
+  label: z.string().describe('A short 2-4 word label for the SQL statement.'),
+  chartConfig: z
+    .object({
+      view: z.enum(['table', 'chart']).describe('How to render the results after execution'),
+      xAxis: z.string().optional().describe('The column to use for the x-axis of the chart.'),
+      yAxis: z.string().optional().describe('The column to use for the y-axis of the chart.'),
+    })
+    .describe('Chart configuration for rendering the results'),
+  isWriteQuery: z
+    .boolean()
+    .default(false)
+    .describe(
+      'Whether the SQL statement performs a write operation of any kind instead of a read operation'
+    ),
+})
+
+export const loadKnowledgeInputSchema = z.object({
+  name: z
+    .enum(Object.keys(KNOWLEDGE) as [KnowledgeName, ...KnowledgeName[]])
+    .describe('The knowledge to load'),
+})
+
 export const getStudioTools = () => ({
   execute_sql: tool({
     description: 'Asks the user to execute a SQL statement and return the results',
-    inputSchema: z.object({
-      // Transform at parse time so the corrected SQL is what gets stored in
-      // toolCall.input — ensuring evals and logs reflect what actually runs.
-      sql: z.string().describe('The SQL statement to execute.').transform(fixSqlBackslashEscapes),
-      label: z.string().describe('A short 2-4 word label for the SQL statement.'),
-      chartConfig: z
-        .object({
-          view: z.enum(['table', 'chart']).describe('How to render the results after execution'),
-          xAxis: z.string().optional().describe('The column to use for the x-axis of the chart.'),
-          yAxis: z.string().optional().describe('The column to use for the y-axis of the chart.'),
-        })
-        .describe('Chart configuration for rendering the results'),
-      isWriteQuery: z
-        .boolean()
-        .default(false)
-        .describe(
-          'Whether the SQL statement performs a write operation of any kind instead of a read operation'
-        ),
-    }),
+    inputSchema: executeSqlInputSchema,
   }),
   deploy_edge_function: tool({
     description:
@@ -61,11 +69,7 @@ export const getStudioTools = () => ({
   load_knowledge: tool({
     description:
       'Load detailed knowledge about a Supabase topic before answering questions about it.',
-    inputSchema: z.object({
-      name: z
-        .enum(Object.keys(KNOWLEDGE) as [KnowledgeName, ...KnowledgeName[]])
-        .describe('The knowledge to load'),
-    }),
+    inputSchema: loadKnowledgeInputSchema,
     execute: ({ name }) => KNOWLEDGE[name],
   }),
 })