mirror of
https://github.com/supabase/supabase.git
synced 2026-07-01 01:25:54 +08:00
feat(assistant): score URL validity and fix support ticket URL guidance (#42227)
**Logic changes** - Adds function in `helpers.ts` to extract URLs from text via regex - I also considering using a library like [linkify-it](https://www.npmjs.com/package/linkify-it) for this but figured it's not worth the extra dep - Adds associated tests in `helpers.test.ts` - Adds "URL Validity" scorer which performs a HEAD request for links in Assistant response text and determins what portion of links have `.ok` responses - Adds eval case to check correctness of support ticket URL answers **Prompt changes** - Informs Assistant of https://supabase.com/dashboard/support/new being the URL to create support tickets - Encourages Assistant to "self-debug" issues before directing users to create support tickets See [Eval Report](https://github.com/supabase/supabase/pull/42227#issuecomment-3807772871) and [Correctness](https://www.braintrust.dev/app/supabase.io/p/Assistant/trace?object_type=experiment&object_id=1ad0f9b0-5adb-436c-9812-a87aac62c036&r=1ef13459-a98c-4904-925e-6d81276cebb2&s=dbe5c607-a560-462b-8745-41d430744431) analysis for new support ticket test case. Resolves AI-384 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added URL validity scoring to evaluations and helper utilities for extracting/cleaning URLs. * Added evaluation cases for support-ticket URL handling and OAuth callback guidance. * **Documentation** * Updated assistant guidance to prefer self-resolution, include support-ticket direction, clarified data-recovery search steps, and added template-URL notation. * **Tests** * Expanded URL extraction and related utility tests to cover many formats and edge cases. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -13,6 +13,7 @@ import {
|
||||
sqlIdentifierQuotingScorer,
|
||||
sqlSyntaxScorer,
|
||||
toolUsageScorer,
|
||||
urlValidityScorer,
|
||||
} from './scorer'
|
||||
import { ToolSet, TypedToolCall, TypedToolResult } from 'ai'
|
||||
|
||||
@@ -83,6 +84,7 @@ Eval('Assistant', {
|
||||
completenessScorer,
|
||||
docsFaithfulnessScorer,
|
||||
correctnessScorer,
|
||||
urlValidityScorer,
|
||||
],
|
||||
})
|
||||
|
||||
|
||||
@@ -106,4 +106,25 @@ export const dataset: AssistantEvalCase[] = [
|
||||
description: 'Invokes `execute_sql` from default "Generate sample data" prompt',
|
||||
},
|
||||
},
|
||||
{
|
||||
input: { prompt: 'Where can I go to create a support ticket?' },
|
||||
expected: {
|
||||
correctAnswer: 'https://supabase.com/dashboard/support/new',
|
||||
},
|
||||
metadata: {
|
||||
category: ['general_help'],
|
||||
description: 'Verifies AI provides valid support ticket URL',
|
||||
},
|
||||
},
|
||||
{
|
||||
input: { prompt: 'What is my OAuth callback URL for setting up GitHub authentication?' },
|
||||
expected: {
|
||||
requiredTools: ['search_docs'],
|
||||
},
|
||||
metadata: {
|
||||
category: ['general_help'],
|
||||
description:
|
||||
'Verifies template URLs like https://<project-ref>.supabase.co/auth/v1/callback are excluded from URL validity scoring',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@@ -2,10 +2,10 @@ import { FinishReason } from 'ai'
|
||||
import { LLMClassifierFromTemplate } from 'autoevals'
|
||||
import { EvalCase, EvalScorer } from 'braintrust'
|
||||
import { stripIndent } from 'common-tags'
|
||||
import { parse } from 'libpg-query'
|
||||
import { MOCK_TABLES_DATA } from 'lib/ai/tools/mock-tools'
|
||||
import { extractUrls } from 'lib/helpers'
|
||||
import { extractIdentifiers } from 'lib/sql-identifier-quoting'
|
||||
import { isQuotedInSql, needsQuoting } from 'lib/sql-identifier-quoting'
|
||||
import { parse } from 'libpg-query'
|
||||
|
||||
const LLM_AS_A_JUDGE_MODEL = 'gpt-5.2-2025-12-11'
|
||||
|
||||
@@ -323,3 +323,41 @@ export const sqlIdentifierQuotingScorer: EvalScorer<Input, Output, Expected> = a
|
||||
metadata: errors.length > 0 ? { errors } : undefined,
|
||||
}
|
||||
}
|
||||
|
||||
export const urlValidityScorer: EvalScorer<Input, Output, Expected> = async ({ output }) => {
|
||||
const responseText = extractTextOnly(output.steps)
|
||||
const urls = extractUrls(responseText, { excludeCodeBlocks: true, excludeTemplates: true })
|
||||
|
||||
// Skip if no URLs found
|
||||
if (urls.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let validUrls = 0
|
||||
|
||||
for (const url of urls) {
|
||||
try {
|
||||
const response = await fetch(url, { method: 'HEAD', signal: AbortSignal.timeout(5000) })
|
||||
if (response.ok) {
|
||||
validUrls++
|
||||
} else {
|
||||
errors.push(`${url} returned ${response.status}`)
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
errors.push(`${url} failed: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
const metadata = {
|
||||
urls,
|
||||
errors: errors.length > 0 ? errors : undefined,
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'URL Validity',
|
||||
score: validUrls / urls.length,
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -611,6 +611,7 @@ export const CHAT_PROMPT = `
|
||||
- When invoking a tool, call it directly without pausing.
|
||||
- Provide succinct outputs unless the complexity of the user request requires additional explanation.
|
||||
- Be confident in your responses and tool calling
|
||||
- When referencing template URLs with placeholders, use angle bracket syntax (e.g., \`https://<project-ref>.supabase.co\`)
|
||||
|
||||
## Chat Naming
|
||||
- At the start of each conversation, if the chat is unnamed, call \`rename_chat\` with a succinct 2–4 word descriptive name (e.g., "User Authentication Setup", "Sales Data Analysis", "Product Table Creation").
|
||||
@@ -636,6 +637,9 @@ export const CHAT_PROMPT = `
|
||||
- To check organization usage, use the organization's usage page. Link directly to https://supabase.com/dashboard/org/_/usage.
|
||||
- Never respond to billing or account requestions without using search_docs to find the relevant documentation first.
|
||||
- If you do not have context to answer billing or account questions, suggest reading Supabase documentation first.
|
||||
## Support
|
||||
- Prefer solving issues yourself before directing users to create support tickets
|
||||
- If needed, direct users to create support tickets via https://supabase.com/dashboard/support/new
|
||||
# Data Recovery
|
||||
When asked about restoring/recovering deleted data:
|
||||
1. Search docs for how deletion works for that data type (e.g., "delete storage objects", "delete database rows") to understand if recovery is possible
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import { copyToClipboard } from 'ui'
|
||||
import { v4 as _uuidV4 } from 'uuid'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import {
|
||||
detectBrowser,
|
||||
detectOS,
|
||||
extractUrls,
|
||||
formatBytes,
|
||||
formatCurrency,
|
||||
getDatabaseMajorVersion,
|
||||
@@ -20,6 +22,7 @@ import {
|
||||
removeCommentsFromSql,
|
||||
removeJSONTrailingComma,
|
||||
snakeToCamel,
|
||||
stripMarkdownCodeBlocks,
|
||||
tablesToSQL,
|
||||
timeout,
|
||||
tryParseInt,
|
||||
@@ -27,8 +30,6 @@ import {
|
||||
uuidv4,
|
||||
} from './helpers'
|
||||
|
||||
import { copyToClipboard } from 'ui'
|
||||
|
||||
vi.mock('uuid', () => ({
|
||||
v4: vi.fn(() => 'mocked-uuid'),
|
||||
}))
|
||||
@@ -311,6 +312,197 @@ describe('isValidHttpUrl', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('extractUrls', () => {
|
||||
it('should extract basic http URLs', () => {
|
||||
const result = extractUrls('Visit http://example.com for more info')
|
||||
expect(result).toEqual(['http://example.com'])
|
||||
})
|
||||
|
||||
it('should extract basic https URLs', () => {
|
||||
const result = extractUrls('Check out https://supabase.com')
|
||||
expect(result).toEqual(['https://supabase.com'])
|
||||
})
|
||||
|
||||
it('should extract URLs with ports', () => {
|
||||
const result = extractUrls('Connect to http://localhost:3000')
|
||||
expect(result).toEqual(['http://localhost:3000'])
|
||||
})
|
||||
|
||||
it('should extract URLs with paths', () => {
|
||||
const result = extractUrls('Go to https://example.com/path/to/page')
|
||||
expect(result).toEqual(['https://example.com/path/to/page'])
|
||||
})
|
||||
|
||||
it('should extract URLs with query parameters', () => {
|
||||
const result = extractUrls('Visit https://example.com/search?q=test&page=1')
|
||||
expect(result).toEqual(['https://example.com/search?q=test&page=1'])
|
||||
})
|
||||
|
||||
it('should extract URLs with fragments', () => {
|
||||
const result = extractUrls('See https://example.com/page#section')
|
||||
expect(result).toEqual(['https://example.com/page#section'])
|
||||
})
|
||||
|
||||
it('should extract URLs with complex paths, query params, and fragments', () => {
|
||||
const result = extractUrls('Check https://example.com/api/v1/users?id=123&name=test#details')
|
||||
expect(result).toEqual(['https://example.com/api/v1/users?id=123&name=test#details'])
|
||||
})
|
||||
|
||||
it('should extract multiple URLs from text', () => {
|
||||
const result = extractUrls('Visit http://example.com and https://supabase.com for more info')
|
||||
expect(result).toEqual(['http://example.com', 'https://supabase.com'])
|
||||
})
|
||||
|
||||
it('should remove trailing punctuation from URLs', () => {
|
||||
const result = extractUrls('Visit https://example.com.')
|
||||
expect(result).toEqual(['https://example.com'])
|
||||
})
|
||||
|
||||
it('should remove multiple trailing punctuation marks', () => {
|
||||
const result = extractUrls('Check https://example.com!!!')
|
||||
expect(result).toEqual(['https://example.com'])
|
||||
})
|
||||
|
||||
it('should remove trailing punctuation including parentheses', () => {
|
||||
const result = extractUrls('See (https://example.com)')
|
||||
expect(result).toEqual(['https://example.com'])
|
||||
})
|
||||
|
||||
it('should handle URLs with trailing commas and periods', () => {
|
||||
const result = extractUrls('Visit https://example.com, and https://supabase.com.')
|
||||
expect(result).toEqual(['https://example.com', 'https://supabase.com'])
|
||||
})
|
||||
|
||||
it('should handle URLs with subpath and markdown bolding', () => {
|
||||
const result = extractUrls('Check out **https://example.com/subpath** for details')
|
||||
expect(result).toEqual(['https://example.com/subpath'])
|
||||
})
|
||||
|
||||
it('should return empty array when no URLs are found', () => {
|
||||
const result = extractUrls('This is just plain text with no URLs')
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
|
||||
it('should return empty array for empty string', () => {
|
||||
const result = extractUrls('')
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
|
||||
it('should handle URLs in parentheses', () => {
|
||||
const result = extractUrls('Check out (https://example.com) for details')
|
||||
expect(result).toEqual(['https://example.com'])
|
||||
})
|
||||
|
||||
it('should be case insensitive for protocol', () => {
|
||||
const result = extractUrls('Visit HTTP://EXAMPLE.COM and HTTPS://SUPABASE.COM')
|
||||
expect(result).toEqual(['HTTP://EXAMPLE.COM', 'HTTPS://SUPABASE.COM'])
|
||||
})
|
||||
|
||||
it('should handle URLs with special characters in path', () => {
|
||||
const result = extractUrls('Visit https://example.com/path_with_underscores/file-name.txt')
|
||||
expect(result).toEqual(['https://example.com/path_with_underscores/file-name.txt'])
|
||||
})
|
||||
|
||||
it('should handle URLs with encoded characters', () => {
|
||||
const result = extractUrls('Visit https://example.com/search?q=hello%20world')
|
||||
expect(result).toEqual(['https://example.com/search?q=hello%20world'])
|
||||
})
|
||||
|
||||
it('should handle URLs with subdomains', () => {
|
||||
const result = extractUrls('Visit https://www.example.com and https://api.example.com')
|
||||
expect(result).toEqual(['https://www.example.com', 'https://api.example.com'])
|
||||
})
|
||||
|
||||
describe('with excludeCodeBlocks option', () => {
|
||||
it('should exclude URLs in fenced code blocks', () => {
|
||||
const text = 'Visit https://real.com\n```\nhttps://code.com\n```'
|
||||
expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual(['https://real.com'])
|
||||
})
|
||||
|
||||
it('should exclude URLs in fenced code blocks with language specifier', () => {
|
||||
const text = 'Visit https://real.com\n```sql\nSELECT * FROM https://code.com\n```'
|
||||
expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual(['https://real.com'])
|
||||
})
|
||||
|
||||
it('should exclude URLs in inline code', () => {
|
||||
const text = 'Use `https://code.com` for the endpoint, or visit https://real.com'
|
||||
expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual(['https://real.com'])
|
||||
})
|
||||
|
||||
it('should handle multiple code blocks', () => {
|
||||
const text =
|
||||
'https://first.com\n```\nhttps://code1.com\n```\nhttps://second.com\n```\nhttps://code2.com\n```'
|
||||
expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual([
|
||||
'https://first.com',
|
||||
'https://second.com',
|
||||
])
|
||||
})
|
||||
|
||||
it('should not exclude code blocks by default', () => {
|
||||
const text = 'Visit https://real.com\n```\nhttps://code.com\n```'
|
||||
expect(extractUrls(text)).toEqual(['https://real.com', 'https://code.com'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('with excludeTemplates option', () => {
|
||||
it('should not extract URLs with angle brackets in subdomain', () => {
|
||||
// Angle brackets in subdomain prevent the URL from being extracted at all
|
||||
const text = 'Visit https://real.com or https://<project-ref>.supabase.co'
|
||||
expect(extractUrls(text, { excludeTemplates: true })).toEqual(['https://real.com'])
|
||||
})
|
||||
|
||||
it('should exclude URLs truncated at angle brackets in path', () => {
|
||||
// The regex stops at angle brackets - exclude the whole truncated URL
|
||||
const text = 'Visit https://real.com or https://example.com/api/<project-id>/data'
|
||||
expect(extractUrls(text, { excludeTemplates: true })).toEqual(['https://real.com'])
|
||||
})
|
||||
|
||||
it('should keep URLs without angle brackets', () => {
|
||||
const text = 'Visit https://example.com/path_with_underscores'
|
||||
expect(extractUrls(text, { excludeTemplates: true })).toEqual([
|
||||
'https://example.com/path_with_underscores',
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('with both options', () => {
|
||||
it('should exclude both code blocks and template URLs', () => {
|
||||
const text =
|
||||
'Visit https://real.com\n```\nhttps://code.com\n```\nOr https://<project-ref>.supabase.co'
|
||||
expect(extractUrls(text, { excludeCodeBlocks: true, excludeTemplates: true })).toEqual([
|
||||
'https://real.com',
|
||||
])
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('stripMarkdownCodeBlocks', () => {
|
||||
it('should remove fenced code blocks', () => {
|
||||
const text = 'Before\n```\ncode here\n```\nAfter'
|
||||
expect(stripMarkdownCodeBlocks(text)).toBe('Before\n\nAfter')
|
||||
})
|
||||
|
||||
it('should remove fenced code blocks with language specifier', () => {
|
||||
const text = 'Before\n```typescript\nconst x = 1;\n```\nAfter'
|
||||
expect(stripMarkdownCodeBlocks(text)).toBe('Before\n\nAfter')
|
||||
})
|
||||
|
||||
it('should remove inline code', () => {
|
||||
const text = 'Use `inline code` here'
|
||||
expect(stripMarkdownCodeBlocks(text)).toBe('Use here')
|
||||
})
|
||||
|
||||
it('should handle multiple code blocks', () => {
|
||||
const text = '```js\ncode1\n```\ntext\n```ts\ncode2\n```'
|
||||
expect(stripMarkdownCodeBlocks(text)).toBe('\ntext\n')
|
||||
})
|
||||
|
||||
it('should preserve text without code blocks', () => {
|
||||
const text = 'Just regular text here'
|
||||
expect(stripMarkdownCodeBlocks(text)).toBe('Just regular text here')
|
||||
})
|
||||
})
|
||||
|
||||
describe('removeCommentsFromSql', () => {
|
||||
it('should remove comments from SQL', () => {
|
||||
const result = removeCommentsFromSql(`-- This is a comment
|
||||
|
||||
@@ -262,6 +262,62 @@ export const isValidHttpUrl = (value: string) => {
|
||||
return url.protocol === 'http:' || url.protocol === 'https:'
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove markdown code blocks (fenced and inline) from text
|
||||
*/
|
||||
export const stripMarkdownCodeBlocks = (text: string): string => {
|
||||
// Remove fenced code blocks (```...```)
|
||||
const withoutFenced = text.replace(/```[\s\S]*?```/g, '')
|
||||
// Remove inline code (`...`)
|
||||
return withoutFenced.replace(/`[^`]+`/g, '')
|
||||
}
|
||||
|
||||
interface ExtractUrlsOptions {
|
||||
excludeCodeBlocks?: boolean
|
||||
excludeTemplates?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract URLs from text using regex for URL detection
|
||||
* Matches URLs with protocols (http/https) and common domain patterns
|
||||
* @param text - The text to extract URLs from
|
||||
* @param options - Optional filtering options
|
||||
* @returns Array of extracted URLs with trailing punctuation removed
|
||||
*/
|
||||
export const extractUrls = (text: string, options?: ExtractUrlsOptions): string[] => {
|
||||
const { excludeCodeBlocks = false, excludeTemplates = false } = options ?? {}
|
||||
|
||||
let processedText = text
|
||||
if (excludeCodeBlocks) {
|
||||
processedText = stripMarkdownCodeBlocks(processedText)
|
||||
}
|
||||
|
||||
// Regex matches URLs with protocols (http/https)
|
||||
// Handles: domains, ports, paths, query params, and fragments
|
||||
// Pattern: https?://domain(:port)?(/path)?(?query)?(#fragment)?
|
||||
const urlRegex = /https?:\/\/(?:[-\w.])+(?::\d+)?(?:\/(?:[\w\/_.~!*'();:@&=+$,?#[\]%-])*)?/gi
|
||||
|
||||
const urls: string[] = []
|
||||
let match
|
||||
|
||||
while ((match = urlRegex.exec(processedText)) !== null) {
|
||||
// Remove trailing punctuation that might have been captured (common in text)
|
||||
const url = match[0].replace(/[.,;:!?)*]+$/, '')
|
||||
|
||||
if (excludeTemplates) {
|
||||
// Skip URLs that were truncated at an angle bracket (template URL)
|
||||
const endPos = match.index + match[0].length
|
||||
if (processedText[endPos] === '<') {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
urls.push(url)
|
||||
}
|
||||
|
||||
return urls
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to remove comments from SQL.
|
||||
* Disclaimer: Doesn't work as intended for nested comments.
|
||||
|
||||
Reference in New Issue
Block a user