Files
supabase/apps/studio/lib/ai/tools/studio-tools.test.ts
Matt Rossman 82deff37de feat(assistant): lazy load topic knowledge via load_knowledge tool (#44296)
Moves knowledge (RLS, Edge Functions, PostgreSQL best practices,
Realtime) out of the static system prompt and into a `load_knowledge`
tool the model calls on demand, reducing prompt bloat. This is a
temporary stopgap until the [standard Supabase
agent-skills](https://github.com/supabase/agent-skills) are ready for
integration in Assistant.

- New always-available `load_knowledge` tool added to
`rendering-tools.ts`
- Updated `Message.Parts.tsx` so the "Ran load_knowledge" chip renders
in chat
- System prompt replaces the four knowledge blobs with an `## Available
Knowledge` block and is hardened to load knowledge for given topics
- New "Knowledge Usage" scorer and `requiredKnowledge` assertions check
that knowledge loads as expected in test scenarios
- Filters GraphQL error responses out of `output.docs` before
faithfulness scoring to reduce noise


See "Knowledge Usage" scoring 100% in evals with no major regressions:
https://github.com/supabase/supabase/pull/44296#issuecomment-4145760236

Sample trace showing the tool in action
([Braintrust](https://www.braintrust.dev/app/supabase.io/p/Assistant/trace?object_type=project_logs&object_id=5a8d02e5-b3b6-40cc-ba76-ecee286478f4&r=351a11c8-9cb7-4945-93ad-d11e8cc2e3e1&s=351a11c8-9cb7-4945-93ad-d11e8cc2e3e1))

<img width="2192" height="1730" alt="CleanShot 2026-03-30 at 13 53
59@2x"
src="https://github.com/user-attachments/assets/f483767c-34e0-401c-8089-5b9834fe696a"
/>


**References**
- https://ai-sdk.dev/cookbook/guides/agent-skills

Closes AI-508

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **New Features**
* Added dynamic knowledge loading capability enabling the AI assistant
to retrieve on-demand information about PostgreSQL best practices, Row
Level Security, Edge Functions, and Realtime.

* **Bug Fixes**
* Improved search results filtering to exclude error responses in tool
outputs.

* **Tests**
  * Enhanced evaluation metrics with knowledge usage scoring.
* Expanded test dataset cases to validate knowledge requirement
handling.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2026-04-02 16:09:06 -04:00

143 lines
4.7 KiB
TypeScript

import { describe, expect, it } from 'vitest'
import { getStudioTools } from './studio-tools'
describe('ai/tools/studio-tools', () => {
describe('getStudioTools', () => {
it('should return an object with tool definitions', () => {
const tools = getStudioTools()
expect(tools).toBeDefined()
expect(typeof tools).toBe('object')
})
it('should include execute_sql tool', () => {
const tools = getStudioTools()
expect(tools.execute_sql).toBeDefined()
expect(tools.execute_sql.description).toContain('execute a SQL statement')
})
it('should include deploy_edge_function tool', () => {
const tools = getStudioTools()
expect(tools.deploy_edge_function).toBeDefined()
expect(tools.deploy_edge_function.description).toContain('deploy a Supabase Edge Function')
})
it('should include rename_chat tool', () => {
const tools = getStudioTools()
expect(tools.rename_chat).toBeDefined()
expect(tools.rename_chat.description).toContain('Rename the current chat session')
})
it('should have exactly 4 tools', () => {
const tools = getStudioTools()
const toolNames = Object.keys(tools)
expect(toolNames).toHaveLength(4)
expect(toolNames).toContain('load_knowledge')
expect(toolNames).toContain('execute_sql')
expect(toolNames).toContain('deploy_edge_function')
expect(toolNames).toContain('rename_chat')
})
it('should have execute_sql with correct input schema fields', () => {
const tools = getStudioTools()
const executeSqlTool = tools.execute_sql
// Check that the tool has an input schema
expect(executeSqlTool.inputSchema).toBeDefined()
// Verify the schema exists and is a Zod object
const schema = executeSqlTool.inputSchema
expect(schema).toBeDefined()
expect((schema as any)._def.typeName).toBe('ZodObject')
})
it('should have deploy_edge_function with input schema', () => {
const tools = getStudioTools()
const deployTool = tools.deploy_edge_function
expect(deployTool.inputSchema).toBeDefined()
// Verify the schema exists and is a Zod object
expect(deployTool.inputSchema).toBeDefined()
expect((deployTool.inputSchema as any)._def.typeName).toBe('ZodObject')
})
it('should have rename_chat with execute function', async () => {
const tools = getStudioTools()
const renameTool = tools.rename_chat
expect(renameTool.execute).toBeDefined()
expect(typeof renameTool.execute).toBe('function')
// Test the execute function
if (!renameTool.execute) throw new Error('execute is undefined')
const result = await renameTool.execute(
{ newName: 'Test Chat' },
{ toolCallId: 'test', messages: [] }
)
expect(result).toEqual({ status: 'Chat request sent to client' })
})
it('should validate execute_sql input schema correctly', () => {
const tools = getStudioTools()
const schema = tools.execute_sql.inputSchema
// Check if schema is a Zod schema with safeParse
if ('safeParse' in schema) {
// Valid input
const validInput = {
sql: 'SELECT * FROM users',
label: 'Get users',
chartConfig: { view: 'table' as const },
isWriteQuery: false,
}
expect(schema.safeParse(validInput).success).toBe(true)
// Valid chart config
const validChartInput = {
sql: 'SELECT count(*) FROM users',
label: 'User count',
chartConfig: { view: 'chart' as const, xAxis: 'date', yAxis: 'count' },
isWriteQuery: false,
}
expect(schema.safeParse(validChartInput).success).toBe(true)
// Missing required field
const invalidInput = {
sql: 'SELECT * FROM users',
// missing label, chartConfig, isWriteQuery
}
expect(schema.safeParse(invalidInput).success).toBe(false)
} else {
// Skip test if schema doesn't have safeParse
expect(schema).toBeDefined()
}
})
it('should validate rename_chat input schema correctly', () => {
const tools = getStudioTools()
const schema = tools.rename_chat.inputSchema
// Check if schema is a Zod schema with safeParse
if ('safeParse' in schema) {
// Valid input
expect(schema.safeParse({ newName: 'My Chat' }).success).toBe(true)
// Invalid input - missing newName
expect(schema.safeParse({}).success).toBe(false)
// Invalid input - wrong type
expect(schema.safeParse({ newName: 123 }).success).toBe(false)
} else {
// Skip test if schema doesn't have safeParse
expect(schema).toBeDefined()
}
})
})
})