From 4b8bab4d146f504cf2d17169db067ed55b3ff94a Mon Sep 17 00:00:00 2001 From: Matt Rossman <22670878+mattrossman@users.noreply.github.com> Date: Fri, 30 Jan 2026 09:53:21 -0500 Subject: [PATCH] feat(assistant): score URL validity and fix support ticket URL guidance (#42227) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Logic changes** - Adds function in `helpers.ts` to extract URLs from text via regex - I also considering using a library like [linkify-it](https://www.npmjs.com/package/linkify-it) for this but figured it's not worth the extra dep - Adds associated tests in `helpers.test.ts` - Adds "URL Validity" scorer which performs a HEAD request for links in Assistant response text and determins what portion of links have `.ok` responses - Adds eval case to check correctness of support ticket URL answers **Prompt changes** - Informs Assistant of https://supabase.com/dashboard/support/new being the URL to create support tickets - Encourages Assistant to "self-debug" issues before directing users to create support tickets See [Eval Report](https://github.com/supabase/supabase/pull/42227#issuecomment-3807772871) and [Correctness](https://www.braintrust.dev/app/supabase.io/p/Assistant/trace?object_type=experiment&object_id=1ad0f9b0-5adb-436c-9812-a87aac62c036&r=1ef13459-a98c-4904-925e-6d81276cebb2&s=dbe5c607-a560-462b-8745-41d430744431) analysis for new support ticket test case. Resolves AI-384 ## Summary by CodeRabbit * **New Features** * Added URL validity scoring to evaluations and helper utilities for extracting/cleaning URLs. * Added evaluation cases for support-ticket URL handling and OAuth callback guidance. * **Documentation** * Updated assistant guidance to prefer self-resolution, include support-ticket direction, clarified data-recovery search steps, and added template-URL notation. * **Tests** * Expanded URL extraction and related utility tests to cover many formats and edge cases. ✏️ Tip: You can customize this high-level summary in your review settings. --- apps/studio/evals/assistant.eval.ts | 2 + apps/studio/evals/dataset.ts | 21 +++ apps/studio/evals/scorer.ts | 42 +++++- apps/studio/lib/ai/prompts.ts | 4 + apps/studio/lib/helpers.test.ts | 196 +++++++++++++++++++++++++++- apps/studio/lib/helpers.ts | 56 ++++++++ 6 files changed, 317 insertions(+), 4 deletions(-) diff --git a/apps/studio/evals/assistant.eval.ts b/apps/studio/evals/assistant.eval.ts index 650dbe2e525..9f069d54f51 100644 --- a/apps/studio/evals/assistant.eval.ts +++ b/apps/studio/evals/assistant.eval.ts @@ -13,6 +13,7 @@ import { sqlIdentifierQuotingScorer, sqlSyntaxScorer, toolUsageScorer, + urlValidityScorer, } from './scorer' import { ToolSet, TypedToolCall, TypedToolResult } from 'ai' @@ -83,6 +84,7 @@ Eval('Assistant', { completenessScorer, docsFaithfulnessScorer, correctnessScorer, + urlValidityScorer, ], }) diff --git a/apps/studio/evals/dataset.ts b/apps/studio/evals/dataset.ts index a73e8638ba5..10652dd6e9b 100644 --- a/apps/studio/evals/dataset.ts +++ b/apps/studio/evals/dataset.ts @@ -106,4 +106,25 @@ export const dataset: AssistantEvalCase[] = [ description: 'Invokes `execute_sql` from default "Generate sample data" prompt', }, }, + { + input: { prompt: 'Where can I go to create a support ticket?' }, + expected: { + correctAnswer: 'https://supabase.com/dashboard/support/new', + }, + metadata: { + category: ['general_help'], + description: 'Verifies AI provides valid support ticket URL', + }, + }, + { + input: { prompt: 'What is my OAuth callback URL for setting up GitHub authentication?' }, + expected: { + requiredTools: ['search_docs'], + }, + metadata: { + category: ['general_help'], + description: + 'Verifies template URLs like https://.supabase.co/auth/v1/callback are excluded from URL validity scoring', + }, + }, ] diff --git a/apps/studio/evals/scorer.ts b/apps/studio/evals/scorer.ts index 6a9a0e1ac48..fae806851dc 100644 --- a/apps/studio/evals/scorer.ts +++ b/apps/studio/evals/scorer.ts @@ -2,10 +2,10 @@ import { FinishReason } from 'ai' import { LLMClassifierFromTemplate } from 'autoevals' import { EvalCase, EvalScorer } from 'braintrust' import { stripIndent } from 'common-tags' -import { parse } from 'libpg-query' -import { MOCK_TABLES_DATA } from 'lib/ai/tools/mock-tools' +import { extractUrls } from 'lib/helpers' import { extractIdentifiers } from 'lib/sql-identifier-quoting' import { isQuotedInSql, needsQuoting } from 'lib/sql-identifier-quoting' +import { parse } from 'libpg-query' const LLM_AS_A_JUDGE_MODEL = 'gpt-5.2-2025-12-11' @@ -323,3 +323,41 @@ export const sqlIdentifierQuotingScorer: EvalScorer = a metadata: errors.length > 0 ? { errors } : undefined, } } + +export const urlValidityScorer: EvalScorer = async ({ output }) => { + const responseText = extractTextOnly(output.steps) + const urls = extractUrls(responseText, { excludeCodeBlocks: true, excludeTemplates: true }) + + // Skip if no URLs found + if (urls.length === 0) { + return null + } + + const errors: string[] = [] + let validUrls = 0 + + for (const url of urls) { + try { + const response = await fetch(url, { method: 'HEAD', signal: AbortSignal.timeout(5000) }) + if (response.ok) { + validUrls++ + } else { + errors.push(`${url} returned ${response.status}`) + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + errors.push(`${url} failed: ${errorMessage}`) + } + } + + const metadata = { + urls, + errors: errors.length > 0 ? errors : undefined, + } + + return { + name: 'URL Validity', + score: validUrls / urls.length, + metadata, + } +} diff --git a/apps/studio/lib/ai/prompts.ts b/apps/studio/lib/ai/prompts.ts index 9c719bca9ec..d212c09bb91 100644 --- a/apps/studio/lib/ai/prompts.ts +++ b/apps/studio/lib/ai/prompts.ts @@ -611,6 +611,7 @@ export const CHAT_PROMPT = ` - When invoking a tool, call it directly without pausing. - Provide succinct outputs unless the complexity of the user request requires additional explanation. - Be confident in your responses and tool calling +- When referencing template URLs with placeholders, use angle bracket syntax (e.g., \`https://.supabase.co\`) ## Chat Naming - At the start of each conversation, if the chat is unnamed, call \`rename_chat\` with a succinct 2–4 word descriptive name (e.g., "User Authentication Setup", "Sales Data Analysis", "Product Table Creation"). @@ -636,6 +637,9 @@ export const CHAT_PROMPT = ` - To check organization usage, use the organization's usage page. Link directly to https://supabase.com/dashboard/org/_/usage. - Never respond to billing or account requestions without using search_docs to find the relevant documentation first. - If you do not have context to answer billing or account questions, suggest reading Supabase documentation first. +## Support +- Prefer solving issues yourself before directing users to create support tickets +- If needed, direct users to create support tickets via https://supabase.com/dashboard/support/new # Data Recovery When asked about restoring/recovering deleted data: 1. Search docs for how deletion works for that data type (e.g., "delete storage objects", "delete database rows") to understand if recovery is possible diff --git a/apps/studio/lib/helpers.test.ts b/apps/studio/lib/helpers.test.ts index 7699fdd2310..4ce0dfe4f62 100644 --- a/apps/studio/lib/helpers.test.ts +++ b/apps/studio/lib/helpers.test.ts @@ -1,9 +1,11 @@ +import { copyToClipboard } from 'ui' import { v4 as _uuidV4 } from 'uuid' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { detectBrowser, detectOS, + extractUrls, formatBytes, formatCurrency, getDatabaseMajorVersion, @@ -20,6 +22,7 @@ import { removeCommentsFromSql, removeJSONTrailingComma, snakeToCamel, + stripMarkdownCodeBlocks, tablesToSQL, timeout, tryParseInt, @@ -27,8 +30,6 @@ import { uuidv4, } from './helpers' -import { copyToClipboard } from 'ui' - vi.mock('uuid', () => ({ v4: vi.fn(() => 'mocked-uuid'), })) @@ -311,6 +312,197 @@ describe('isValidHttpUrl', () => { }) }) +describe('extractUrls', () => { + it('should extract basic http URLs', () => { + const result = extractUrls('Visit http://example.com for more info') + expect(result).toEqual(['http://example.com']) + }) + + it('should extract basic https URLs', () => { + const result = extractUrls('Check out https://supabase.com') + expect(result).toEqual(['https://supabase.com']) + }) + + it('should extract URLs with ports', () => { + const result = extractUrls('Connect to http://localhost:3000') + expect(result).toEqual(['http://localhost:3000']) + }) + + it('should extract URLs with paths', () => { + const result = extractUrls('Go to https://example.com/path/to/page') + expect(result).toEqual(['https://example.com/path/to/page']) + }) + + it('should extract URLs with query parameters', () => { + const result = extractUrls('Visit https://example.com/search?q=test&page=1') + expect(result).toEqual(['https://example.com/search?q=test&page=1']) + }) + + it('should extract URLs with fragments', () => { + const result = extractUrls('See https://example.com/page#section') + expect(result).toEqual(['https://example.com/page#section']) + }) + + it('should extract URLs with complex paths, query params, and fragments', () => { + const result = extractUrls('Check https://example.com/api/v1/users?id=123&name=test#details') + expect(result).toEqual(['https://example.com/api/v1/users?id=123&name=test#details']) + }) + + it('should extract multiple URLs from text', () => { + const result = extractUrls('Visit http://example.com and https://supabase.com for more info') + expect(result).toEqual(['http://example.com', 'https://supabase.com']) + }) + + it('should remove trailing punctuation from URLs', () => { + const result = extractUrls('Visit https://example.com.') + expect(result).toEqual(['https://example.com']) + }) + + it('should remove multiple trailing punctuation marks', () => { + const result = extractUrls('Check https://example.com!!!') + expect(result).toEqual(['https://example.com']) + }) + + it('should remove trailing punctuation including parentheses', () => { + const result = extractUrls('See (https://example.com)') + expect(result).toEqual(['https://example.com']) + }) + + it('should handle URLs with trailing commas and periods', () => { + const result = extractUrls('Visit https://example.com, and https://supabase.com.') + expect(result).toEqual(['https://example.com', 'https://supabase.com']) + }) + + it('should handle URLs with subpath and markdown bolding', () => { + const result = extractUrls('Check out **https://example.com/subpath** for details') + expect(result).toEqual(['https://example.com/subpath']) + }) + + it('should return empty array when no URLs are found', () => { + const result = extractUrls('This is just plain text with no URLs') + expect(result).toEqual([]) + }) + + it('should return empty array for empty string', () => { + const result = extractUrls('') + expect(result).toEqual([]) + }) + + it('should handle URLs in parentheses', () => { + const result = extractUrls('Check out (https://example.com) for details') + expect(result).toEqual(['https://example.com']) + }) + + it('should be case insensitive for protocol', () => { + const result = extractUrls('Visit HTTP://EXAMPLE.COM and HTTPS://SUPABASE.COM') + expect(result).toEqual(['HTTP://EXAMPLE.COM', 'HTTPS://SUPABASE.COM']) + }) + + it('should handle URLs with special characters in path', () => { + const result = extractUrls('Visit https://example.com/path_with_underscores/file-name.txt') + expect(result).toEqual(['https://example.com/path_with_underscores/file-name.txt']) + }) + + it('should handle URLs with encoded characters', () => { + const result = extractUrls('Visit https://example.com/search?q=hello%20world') + expect(result).toEqual(['https://example.com/search?q=hello%20world']) + }) + + it('should handle URLs with subdomains', () => { + const result = extractUrls('Visit https://www.example.com and https://api.example.com') + expect(result).toEqual(['https://www.example.com', 'https://api.example.com']) + }) + + describe('with excludeCodeBlocks option', () => { + it('should exclude URLs in fenced code blocks', () => { + const text = 'Visit https://real.com\n```\nhttps://code.com\n```' + expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual(['https://real.com']) + }) + + it('should exclude URLs in fenced code blocks with language specifier', () => { + const text = 'Visit https://real.com\n```sql\nSELECT * FROM https://code.com\n```' + expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual(['https://real.com']) + }) + + it('should exclude URLs in inline code', () => { + const text = 'Use `https://code.com` for the endpoint, or visit https://real.com' + expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual(['https://real.com']) + }) + + it('should handle multiple code blocks', () => { + const text = + 'https://first.com\n```\nhttps://code1.com\n```\nhttps://second.com\n```\nhttps://code2.com\n```' + expect(extractUrls(text, { excludeCodeBlocks: true })).toEqual([ + 'https://first.com', + 'https://second.com', + ]) + }) + + it('should not exclude code blocks by default', () => { + const text = 'Visit https://real.com\n```\nhttps://code.com\n```' + expect(extractUrls(text)).toEqual(['https://real.com', 'https://code.com']) + }) + }) + + describe('with excludeTemplates option', () => { + it('should not extract URLs with angle brackets in subdomain', () => { + // Angle brackets in subdomain prevent the URL from being extracted at all + const text = 'Visit https://real.com or https://.supabase.co' + expect(extractUrls(text, { excludeTemplates: true })).toEqual(['https://real.com']) + }) + + it('should exclude URLs truncated at angle brackets in path', () => { + // The regex stops at angle brackets - exclude the whole truncated URL + const text = 'Visit https://real.com or https://example.com/api//data' + expect(extractUrls(text, { excludeTemplates: true })).toEqual(['https://real.com']) + }) + + it('should keep URLs without angle brackets', () => { + const text = 'Visit https://example.com/path_with_underscores' + expect(extractUrls(text, { excludeTemplates: true })).toEqual([ + 'https://example.com/path_with_underscores', + ]) + }) + }) + + describe('with both options', () => { + it('should exclude both code blocks and template URLs', () => { + const text = + 'Visit https://real.com\n```\nhttps://code.com\n```\nOr https://.supabase.co' + expect(extractUrls(text, { excludeCodeBlocks: true, excludeTemplates: true })).toEqual([ + 'https://real.com', + ]) + }) + }) +}) + +describe('stripMarkdownCodeBlocks', () => { + it('should remove fenced code blocks', () => { + const text = 'Before\n```\ncode here\n```\nAfter' + expect(stripMarkdownCodeBlocks(text)).toBe('Before\n\nAfter') + }) + + it('should remove fenced code blocks with language specifier', () => { + const text = 'Before\n```typescript\nconst x = 1;\n```\nAfter' + expect(stripMarkdownCodeBlocks(text)).toBe('Before\n\nAfter') + }) + + it('should remove inline code', () => { + const text = 'Use `inline code` here' + expect(stripMarkdownCodeBlocks(text)).toBe('Use here') + }) + + it('should handle multiple code blocks', () => { + const text = '```js\ncode1\n```\ntext\n```ts\ncode2\n```' + expect(stripMarkdownCodeBlocks(text)).toBe('\ntext\n') + }) + + it('should preserve text without code blocks', () => { + const text = 'Just regular text here' + expect(stripMarkdownCodeBlocks(text)).toBe('Just regular text here') + }) +}) + describe('removeCommentsFromSql', () => { it('should remove comments from SQL', () => { const result = removeCommentsFromSql(`-- This is a comment diff --git a/apps/studio/lib/helpers.ts b/apps/studio/lib/helpers.ts index 58ffb5c303d..23d7437f36d 100644 --- a/apps/studio/lib/helpers.ts +++ b/apps/studio/lib/helpers.ts @@ -262,6 +262,62 @@ export const isValidHttpUrl = (value: string) => { return url.protocol === 'http:' || url.protocol === 'https:' } +/** + * Remove markdown code blocks (fenced and inline) from text + */ +export const stripMarkdownCodeBlocks = (text: string): string => { + // Remove fenced code blocks (```...```) + const withoutFenced = text.replace(/```[\s\S]*?```/g, '') + // Remove inline code (`...`) + return withoutFenced.replace(/`[^`]+`/g, '') +} + +interface ExtractUrlsOptions { + excludeCodeBlocks?: boolean + excludeTemplates?: boolean +} + +/** + * Extract URLs from text using regex for URL detection + * Matches URLs with protocols (http/https) and common domain patterns + * @param text - The text to extract URLs from + * @param options - Optional filtering options + * @returns Array of extracted URLs with trailing punctuation removed + */ +export const extractUrls = (text: string, options?: ExtractUrlsOptions): string[] => { + const { excludeCodeBlocks = false, excludeTemplates = false } = options ?? {} + + let processedText = text + if (excludeCodeBlocks) { + processedText = stripMarkdownCodeBlocks(processedText) + } + + // Regex matches URLs with protocols (http/https) + // Handles: domains, ports, paths, query params, and fragments + // Pattern: https?://domain(:port)?(/path)?(?query)?(#fragment)? + const urlRegex = /https?:\/\/(?:[-\w.])+(?::\d+)?(?:\/(?:[\w\/_.~!*'();:@&=+$,?#[\]%-])*)?/gi + + const urls: string[] = [] + let match + + while ((match = urlRegex.exec(processedText)) !== null) { + // Remove trailing punctuation that might have been captured (common in text) + const url = match[0].replace(/[.,;:!?)*]+$/, '') + + if (excludeTemplates) { + // Skip URLs that were truncated at an angle bracket (template URL) + const endPos = match.index + match[0].length + if (processedText[endPos] === '<') { + continue + } + } + + urls.push(url) + } + + return urls +} + /** * Helper function to remove comments from SQL. * Disclaimer: Doesn't work as intended for nested comments.