mirror of
https://github.com/supabase/supabase.git
synced 2026-05-08 15:57:47 +08:00
Moves knowledge (RLS, Edge Functions, PostgreSQL best practices, Realtime) out of the static system prompt and into a `load_knowledge` tool the model calls on demand, reducing prompt bloat. This is a temporary stopgap until the [standard Supabase agent-skills](https://github.com/supabase/agent-skills) are ready for integration in Assistant. - New always-available `load_knowledge` tool added to `rendering-tools.ts` - Updated `Message.Parts.tsx` so the "Ran load_knowledge" chip renders in chat - System prompt replaces the four knowledge blobs with an `## Available Knowledge` block and is hardened to load knowledge for given topics - New "Knowledge Usage" scorer and `requiredKnowledge` assertions check that knowledge loads as expected in test scenarios - Filters GraphQL error responses out of `output.docs` before faithfulness scoring to reduce noise See "Knowledge Usage" scoring 100% in evals with no major regressions: https://github.com/supabase/supabase/pull/44296#issuecomment-4145760236 Sample trace showing the tool in action ([Braintrust](https://www.braintrust.dev/app/supabase.io/p/Assistant/trace?object_type=project_logs&object_id=5a8d02e5-b3b6-40cc-ba76-ecee286478f4&r=351a11c8-9cb7-4945-93ad-d11e8cc2e3e1&s=351a11c8-9cb7-4945-93ad-d11e8cc2e3e1)) <img width="2192" height="1730" alt="CleanShot 2026-03-30 at 13 53 59@2x" src="https://github.com/user-attachments/assets/f483767c-34e0-401c-8089-5b9834fe696a" /> **References** - https://ai-sdk.dev/cookbook/guides/agent-skills Closes AI-508 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added dynamic knowledge loading capability enabling the AI assistant to retrieve on-demand information about PostgreSQL best practices, Row Level Security, Edge Functions, and Realtime. * **Bug Fixes** * Improved search results filtering to exclude error responses in tool outputs. * **Tests** * Enhanced evaluation metrics with knowledge usage scoring. * Expanded test dataset cases to validate knowledge requirement handling. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
143 lines
4.7 KiB
TypeScript
143 lines
4.7 KiB
TypeScript
import { describe, expect, it } from 'vitest'
|
|
|
|
import { getStudioTools } from './studio-tools'
|
|
|
|
describe('ai/tools/studio-tools', () => {
|
|
describe('getStudioTools', () => {
|
|
it('should return an object with tool definitions', () => {
|
|
const tools = getStudioTools()
|
|
|
|
expect(tools).toBeDefined()
|
|
expect(typeof tools).toBe('object')
|
|
})
|
|
|
|
it('should include execute_sql tool', () => {
|
|
const tools = getStudioTools()
|
|
|
|
expect(tools.execute_sql).toBeDefined()
|
|
expect(tools.execute_sql.description).toContain('execute a SQL statement')
|
|
})
|
|
|
|
it('should include deploy_edge_function tool', () => {
|
|
const tools = getStudioTools()
|
|
|
|
expect(tools.deploy_edge_function).toBeDefined()
|
|
expect(tools.deploy_edge_function.description).toContain('deploy a Supabase Edge Function')
|
|
})
|
|
|
|
it('should include rename_chat tool', () => {
|
|
const tools = getStudioTools()
|
|
|
|
expect(tools.rename_chat).toBeDefined()
|
|
expect(tools.rename_chat.description).toContain('Rename the current chat session')
|
|
})
|
|
|
|
it('should have exactly 4 tools', () => {
|
|
const tools = getStudioTools()
|
|
const toolNames = Object.keys(tools)
|
|
|
|
expect(toolNames).toHaveLength(4)
|
|
expect(toolNames).toContain('load_knowledge')
|
|
expect(toolNames).toContain('execute_sql')
|
|
expect(toolNames).toContain('deploy_edge_function')
|
|
expect(toolNames).toContain('rename_chat')
|
|
})
|
|
|
|
it('should have execute_sql with correct input schema fields', () => {
|
|
const tools = getStudioTools()
|
|
const executeSqlTool = tools.execute_sql
|
|
|
|
// Check that the tool has an input schema
|
|
expect(executeSqlTool.inputSchema).toBeDefined()
|
|
|
|
// Verify the schema exists and is a Zod object
|
|
const schema = executeSqlTool.inputSchema
|
|
expect(schema).toBeDefined()
|
|
expect((schema as any)._def.typeName).toBe('ZodObject')
|
|
})
|
|
|
|
it('should have deploy_edge_function with input schema', () => {
|
|
const tools = getStudioTools()
|
|
const deployTool = tools.deploy_edge_function
|
|
|
|
expect(deployTool.inputSchema).toBeDefined()
|
|
|
|
// Verify the schema exists and is a Zod object
|
|
expect(deployTool.inputSchema).toBeDefined()
|
|
expect((deployTool.inputSchema as any)._def.typeName).toBe('ZodObject')
|
|
})
|
|
|
|
it('should have rename_chat with execute function', async () => {
|
|
const tools = getStudioTools()
|
|
const renameTool = tools.rename_chat
|
|
|
|
expect(renameTool.execute).toBeDefined()
|
|
expect(typeof renameTool.execute).toBe('function')
|
|
|
|
// Test the execute function
|
|
if (!renameTool.execute) throw new Error('execute is undefined')
|
|
const result = await renameTool.execute(
|
|
{ newName: 'Test Chat' },
|
|
{ toolCallId: 'test', messages: [] }
|
|
)
|
|
expect(result).toEqual({ status: 'Chat request sent to client' })
|
|
})
|
|
|
|
it('should validate execute_sql input schema correctly', () => {
|
|
const tools = getStudioTools()
|
|
const schema = tools.execute_sql.inputSchema
|
|
|
|
// Check if schema is a Zod schema with safeParse
|
|
if ('safeParse' in schema) {
|
|
// Valid input
|
|
const validInput = {
|
|
sql: 'SELECT * FROM users',
|
|
label: 'Get users',
|
|
chartConfig: { view: 'table' as const },
|
|
isWriteQuery: false,
|
|
}
|
|
expect(schema.safeParse(validInput).success).toBe(true)
|
|
|
|
// Valid chart config
|
|
const validChartInput = {
|
|
sql: 'SELECT count(*) FROM users',
|
|
label: 'User count',
|
|
chartConfig: { view: 'chart' as const, xAxis: 'date', yAxis: 'count' },
|
|
isWriteQuery: false,
|
|
}
|
|
expect(schema.safeParse(validChartInput).success).toBe(true)
|
|
|
|
// Missing required field
|
|
const invalidInput = {
|
|
sql: 'SELECT * FROM users',
|
|
// missing label, chartConfig, isWriteQuery
|
|
}
|
|
expect(schema.safeParse(invalidInput).success).toBe(false)
|
|
} else {
|
|
// Skip test if schema doesn't have safeParse
|
|
expect(schema).toBeDefined()
|
|
}
|
|
})
|
|
|
|
it('should validate rename_chat input schema correctly', () => {
|
|
const tools = getStudioTools()
|
|
const schema = tools.rename_chat.inputSchema
|
|
|
|
// Check if schema is a Zod schema with safeParse
|
|
if ('safeParse' in schema) {
|
|
// Valid input
|
|
expect(schema.safeParse({ newName: 'My Chat' }).success).toBe(true)
|
|
|
|
// Invalid input - missing newName
|
|
expect(schema.safeParse({}).success).toBe(false)
|
|
|
|
// Invalid input - wrong type
|
|
expect(schema.safeParse({ newName: 123 }).success).toBe(false)
|
|
} else {
|
|
// Skip test if schema doesn't have safeParse
|
|
expect(schema).toBeDefined()
|
|
}
|
|
})
|
|
})
|
|
})
|