mirror of
https://github.com/supabase/supabase.git
synced 2026-05-06 22:18:00 +08:00
feat(assistant): online evals support and CI workflows (#43194)
Lays groundwork for online evals on Assistant chat logs. https://www.braintrust.dev/docs/observe/score-online ### Changes - New workflows: - `braintrust-scorers-deploy.yml` keeps prod scorers in sync on push to `master` - `braintrust-preview-scorers-deploy.yml` deploys preview scorers to the staging project for PRs labeled `preview-scorers`, posting a comment with scorer links ([example](https://github.com/supabase/supabase/pull/43194#issuecomment-4000097222)) - `braintrust-preview-scorers-cleanup.yml` deletes preview scorers when the PR is closed ([example](https://github.com/supabase/supabase/pull/43194#issuecomment-4000749847)) - Adds `evals/scorer-online.ts` entry point invoked with `pnpm scorers:deploy`, registering scorers for online evals in the Braintrust "Assistant" project - Refactors scorer code to separate online-compatible scorers (`scorer-online.ts`) from WASM-dependent ones (`scorer-wasm.ts`) - "URL Validity" scorer now only checks Supabase domains to prevent requests to untrusted origins - Span `input` is now shaped `{ prompt: string }` instead of plain `string` for compatibility with offline eval scorers - Env vars `BRAINTRUST_STAGING_PROJECT_ID` and `BRAINTRUST_PROJECT_ID` configured in GitHub repo settings - `generateAssistantResponse` now uses `startSpan` + `withCurrent` instead of `traced()` to manually manage the root span lifecycle — this ensures `onFinish` logs output to the span _before_ `span.end()` is called, which is when Braintrust triggers scoring automations ### Online Scorers We share scoring logic across offline and online evals, but some of our scorers aren't transferrable to an "online" setting due to runtime challenges or ground truth requirements. **Supported** - Goal Completion - Conciseness - Completeness - Docs Faithfulness - URL Validity **Unsupported** - Correctness (requires ground truth output) - Tool Usage (requires ground truth requiredTools) - SQL Syntax (uses libpg-query WASM) - SQL Identifier Quoting (uses libpg-query WASM) ### How to use these scorers Going forward if you want to add/edit online eval scorers, add the `preview-scorers` label to a PR. This deploys scorers to the [Assistant (Staging Scorers)](https://www.braintrust.dev/app/supabase.io/p/Assistant%20(Staging%20Scorers)?v=Overview) project in Braintrust with branch-specific slugs, and comments on the PR ([example](https://github.com/supabase/supabase/pull/43194#issuecomment-4000097222)). From the Braintrust dashboard you can "Test" the scorer with traces from any project. <img width="1866" height="528" alt="CleanShot 2026-03-05 at 15 15 00@2x" src="https://github.com/user-attachments/assets/4f15cebc-3f2d-4e8a-9ee2-fe8ef7bf4199" /> Once merged, scorers are deployed to the primary [Assistant](https://www.braintrust.dev/app/supabase.io/p/Assistant) project, and preview scorers are deleted from the staging project. Down the road, scorers on the Assistant project will run automatically on a sample of production traces. Closes AI-437
This commit is contained in:
2
.github/workflows/braintrust-evals.yml
vendored
2
.github/workflows/braintrust-evals.yml
vendored
@@ -17,7 +17,7 @@ permissions:
|
||||
jobs:
|
||||
eval:
|
||||
name: Run evals
|
||||
if: github.event_name == 'push' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-evals'))
|
||||
if: github.event_name == 'push' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-evals') && github.event.pull_request.head.repo.full_name == github.repository)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
|
||||
|
||||
65
.github/workflows/braintrust-preview-scorers-cleanup.yml
vendored
Normal file
65
.github/workflows/braintrust-preview-scorers-cleanup.yml
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
name: Cleanup Braintrust preview scorers
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [closed]
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
cleanup-scorers:
|
||||
name: Delete preview scorers
|
||||
if: contains(github.event.pull_request.labels.*.name, 'preview-scorers') && github.event.pull_request.head.repo.full_name == github.repository
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Delete preview scorers from staging
|
||||
env:
|
||||
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
||||
BRAINTRUST_STAGING_PROJECT_ID: ${{ secrets.BRAINTRUST_STAGING_PROJECT_ID }}
|
||||
run: |
|
||||
BRANCH_SLUG=$(echo "${GITHUB_HEAD_REF}" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g')
|
||||
readarray -t SLUGS < <(jq -r '.[].slug' apps/studio/evals/scorer-online-manifest.json)
|
||||
|
||||
for slug in "${SLUGS[@]}"; do
|
||||
prefixed="${BRANCH_SLUG}-${slug}"
|
||||
id=$(curl -s "https://api.braintrust.dev/v1/function?project_id=${BRAINTRUST_STAGING_PROJECT_ID}&slug=${prefixed}" \
|
||||
-H "Authorization: Bearer ${BRAINTRUST_API_KEY}" | jq -r '.objects[0].id // empty')
|
||||
if [ -n "$id" ]; then
|
||||
curl -s -X DELETE "https://api.braintrust.dev/v1/function/${id}" \
|
||||
-H "Authorization: Bearer ${BRAINTRUST_API_KEY}"
|
||||
echo "Deleted ${prefixed}"
|
||||
else
|
||||
echo "Not found: ${prefixed} (already deleted or never deployed)"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Post cleanup comment
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
|
||||
with:
|
||||
script: |
|
||||
const prNumber = context.payload.pull_request.number
|
||||
const marker = '<!-- preview-scorers-bot -->'
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
})
|
||||
|
||||
const existing = comments.find(c => c.body?.includes(marker))
|
||||
if (!existing) return
|
||||
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: `${marker}\n### Braintrust Preview Scorers\n\nPreview scorers have been cleaned up.`,
|
||||
})
|
||||
123
.github/workflows/braintrust-preview-scorers-deploy.yml
vendored
Normal file
123
.github/workflows/braintrust-preview-scorers-deploy.yml
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
name: Deploy Braintrust preview scorers
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [labeled, synchronize]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
push-scorers:
|
||||
name: Push preview scorers
|
||||
if: contains(github.event.pull_request.labels.*.name, 'preview-scorers') && github.event.pull_request.head.repo.full_name == github.repository
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.head_ref || github.ref_name }}
|
||||
|
||||
- name: Check for scorer file changes
|
||||
id: changed
|
||||
# On labeled events, always push. On synchronize, only push if scorer files changed.
|
||||
run: |
|
||||
if [[ "${{ github.event.action }}" == "synchronize" ]]; then
|
||||
changed=$(git diff --name-only origin/${{ github.event.pull_request.base.ref }}...HEAD | grep -E 'evals/scorer' || true)
|
||||
if [ -z "$changed" ]; then
|
||||
echo "No scorer files changed, skipping push"
|
||||
echo "skip=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "skip=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "skip=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Install pnpm
|
||||
if: steps.changed.outputs.skip != 'true'
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
with:
|
||||
run_install: false
|
||||
|
||||
- name: Use Node.js
|
||||
if: steps.changed.outputs.skip != 'true'
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version-file: ".nvmrc"
|
||||
cache: "pnpm"
|
||||
|
||||
- name: Install Dependencies
|
||||
if: steps.changed.outputs.skip != 'true'
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Push scorers to staging
|
||||
if: steps.changed.outputs.skip != 'true'
|
||||
id: push
|
||||
run: |
|
||||
cd apps/studio && pnpm scorers:deploy
|
||||
slugs=$(jq -r '[.[].slug] | join(",")' evals/scorer-online-manifest.json)
|
||||
echo "slugs=$slugs" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
||||
BRAINTRUST_PROJECT_ID: ${{ secrets.BRAINTRUST_STAGING_PROJECT_ID }}
|
||||
GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Post PR comment
|
||||
if: steps.changed.outputs.skip != 'true'
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
|
||||
env:
|
||||
BRAINTRUST_STAGING_PROJECT_ID: ${{ secrets.BRAINTRUST_STAGING_PROJECT_ID }}
|
||||
SCORER_SLUGS: ${{ steps.push.outputs.slugs }}
|
||||
with:
|
||||
script: |
|
||||
const prNumber = context.payload.pull_request.number
|
||||
const branch = process.env.GITHUB_HEAD_REF
|
||||
const prefix = branch.replace(/[^a-z0-9-]/gi, '-').toLowerCase()
|
||||
const stagingUrl = 'https://www.braintrust.dev/app/supabase.io/p/Assistant%20(Staging%20Scorers)/scorers'
|
||||
|
||||
const slugs = process.env.SCORER_SLUGS.split(',')
|
||||
const slugList = slugs.map(s => `- \`${prefix}-${s}\``).join('\n')
|
||||
|
||||
const sha = context.sha.slice(0, 7)
|
||||
const marker = '<!-- preview-scorers-bot -->'
|
||||
const body = `${marker}
|
||||
### Braintrust Preview Scorers
|
||||
|
||||
Deployed scorers to [Assistant (Staging Scorers)](${stagingUrl}):
|
||||
|
||||
${slugList}
|
||||
|
||||
Commit: ${sha}`
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
})
|
||||
|
||||
const existing = comments.find(c => c.body?.includes(marker))
|
||||
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body,
|
||||
})
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body,
|
||||
})
|
||||
}
|
||||
43
.github/workflows/braintrust-scorers-deploy.yml
vendored
Normal file
43
.github/workflows/braintrust-scorers-deploy.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: Deploy Braintrust scorers
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
paths:
|
||||
- "apps/studio/evals/scorer.ts"
|
||||
- "apps/studio/evals/scorer-online.ts"
|
||||
- "apps/studio/evals/scorer-online-manifest.json"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
push:
|
||||
name: Push scorers
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
env:
|
||||
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
|
||||
BRAINTRUST_PROJECT_ID: ${{ secrets.BRAINTRUST_PROJECT_ID }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
|
||||
with:
|
||||
run_install: false
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version-file: ".nvmrc"
|
||||
cache: "pnpm"
|
||||
|
||||
- name: Install Dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Push scorers
|
||||
run: cd apps/studio && pnpm scorers:deploy
|
||||
@@ -1,21 +1,21 @@
|
||||
import assert from 'node:assert'
|
||||
import { openai } from '@ai-sdk/openai'
|
||||
import { Eval } from 'braintrust'
|
||||
import { generateAssistantResponse } from 'lib/ai/generate-assistant-response'
|
||||
import { getMockTools } from 'lib/ai/tools/mock-tools'
|
||||
import assert from 'node:assert'
|
||||
|
||||
import { dataset } from './dataset'
|
||||
import { buildAssistantEvalOutput } from './output'
|
||||
import {
|
||||
completenessScorer,
|
||||
concisenessScorer,
|
||||
correctnessScorer,
|
||||
docsFaithfulnessScorer,
|
||||
goalCompletionScorer,
|
||||
sqlIdentifierQuotingScorer,
|
||||
sqlSyntaxScorer,
|
||||
toolUsageScorer,
|
||||
urlValidityScorer,
|
||||
} from './scorer'
|
||||
import { ToolSet, TypedToolCall, TypedToolResult } from 'ai'
|
||||
import { sqlIdentifierQuotingScorer, sqlSyntaxScorer } from './scorer-wasm'
|
||||
|
||||
assert(process.env.BRAINTRUST_PROJECT_ID, 'BRAINTRUST_PROJECT_ID is not set')
|
||||
assert(process.env.OPENAI_API_KEY, 'OPENAI_API_KEY is not set')
|
||||
@@ -31,50 +31,10 @@ Eval('Assistant', {
|
||||
tools: await getMockTools(input.mockTables ? { list_tables: input.mockTables } : undefined),
|
||||
})
|
||||
|
||||
const finishReason = await result.finishReason
|
||||
|
||||
// `result.toolCalls` only shows the last step, instead aggregate tools across all steps
|
||||
const steps = await result.steps
|
||||
const [finishReason, steps] = await Promise.all([result.finishReason, result.steps])
|
||||
|
||||
const simplifiedSteps = steps.map((step) => ({
|
||||
text: step.text,
|
||||
toolCalls: step.toolCalls.map((call) => ({
|
||||
toolName: call.toolName,
|
||||
input: call.input,
|
||||
})),
|
||||
}))
|
||||
|
||||
const toolNames: string[] = []
|
||||
const sqlQueries: string[] = []
|
||||
const docs: string[] = []
|
||||
|
||||
for (const step of steps) {
|
||||
for (const [i, toolCall] of step.toolCalls.entries()) {
|
||||
toolNames.push(toolCall.toolName)
|
||||
|
||||
const toolResult = step.toolResults.at(i)
|
||||
if (!toolResult) {
|
||||
continue
|
||||
}
|
||||
|
||||
const parsed = parseToolCall(toolCall, toolResult)
|
||||
|
||||
if (parsed.sqlQuery) {
|
||||
sqlQueries.push(parsed.sqlQuery)
|
||||
}
|
||||
if (parsed.docs) {
|
||||
docs.push(...parsed.docs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
finishReason,
|
||||
steps: simplifiedSteps,
|
||||
toolNames,
|
||||
sqlQueries,
|
||||
docs,
|
||||
}
|
||||
return buildAssistantEvalOutput(finishReason, steps)
|
||||
},
|
||||
scores: [
|
||||
toolUsageScorer,
|
||||
@@ -88,45 +48,3 @@ Eval('Assistant', {
|
||||
urlValidityScorer,
|
||||
],
|
||||
})
|
||||
|
||||
type ParsedToolCall = {
|
||||
/** Query generated by `execute_sql` */
|
||||
sqlQuery?: string
|
||||
|
||||
/** Docs text pulled in from `search_docs` */
|
||||
docs?: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and extract relevant info from a tool call/result
|
||||
*/
|
||||
function parseToolCall(
|
||||
toolCall: TypedToolCall<ToolSet>,
|
||||
toolResult: TypedToolResult<ToolSet>
|
||||
): ParsedToolCall {
|
||||
switch (toolCall.toolName) {
|
||||
case 'execute_sql': {
|
||||
const sqlQuery = toolCall.input.sql
|
||||
if (typeof sqlQuery !== 'string') {
|
||||
return {}
|
||||
}
|
||||
|
||||
return { sqlQuery }
|
||||
}
|
||||
case 'search_docs': {
|
||||
const content = toolResult.output.content
|
||||
if (!content || !Array.isArray(content)) {
|
||||
return {}
|
||||
}
|
||||
|
||||
const docs = content.map((item) => item?.text).filter((text) => typeof text === 'string')
|
||||
if (docs.length === 0) {
|
||||
return {}
|
||||
}
|
||||
|
||||
return { docs }
|
||||
}
|
||||
}
|
||||
|
||||
return {}
|
||||
}
|
||||
|
||||
66
apps/studio/evals/output.ts
Normal file
66
apps/studio/evals/output.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { type ToolSet, type TypedToolCall, type TypedToolResult } from 'ai'
|
||||
import { type AssistantEvalOutput } from './scorer'
|
||||
|
||||
type Step = {
|
||||
text: string
|
||||
toolCalls: TypedToolCall<ToolSet>[]
|
||||
toolResults: TypedToolResult<ToolSet>[]
|
||||
}
|
||||
|
||||
type ParsedToolCall = {
|
||||
/** Query generated by `execute_sql` */
|
||||
sqlQuery?: string
|
||||
/** Docs text pulled in from `search_docs` */
|
||||
docs?: string[]
|
||||
}
|
||||
|
||||
function parseToolCall(
|
||||
toolCall: TypedToolCall<ToolSet>,
|
||||
toolResult: TypedToolResult<ToolSet>
|
||||
): ParsedToolCall {
|
||||
switch (toolCall.toolName) {
|
||||
case 'execute_sql': {
|
||||
const sqlQuery = toolCall.input?.sql
|
||||
if (typeof sqlQuery !== 'string') return {}
|
||||
return { sqlQuery }
|
||||
}
|
||||
case 'search_docs': {
|
||||
const content = toolResult.output?.content
|
||||
if (!content || !Array.isArray(content)) return {}
|
||||
const docs = content.map((item) => item?.text).filter((text) => typeof text === 'string')
|
||||
if (docs.length === 0) return {}
|
||||
return { docs }
|
||||
}
|
||||
}
|
||||
return {}
|
||||
}
|
||||
|
||||
export function buildAssistantEvalOutput(
|
||||
finishReason: AssistantEvalOutput['finishReason'],
|
||||
steps: Step[]
|
||||
): AssistantEvalOutput {
|
||||
const simplifiedSteps = steps.map((step) => ({
|
||||
text: step.text,
|
||||
toolCalls: step.toolCalls.map((call) => ({
|
||||
toolName: call.toolName,
|
||||
input: call.input,
|
||||
})),
|
||||
}))
|
||||
|
||||
const toolNames: string[] = []
|
||||
const sqlQueries: string[] = []
|
||||
const docs: string[] = []
|
||||
|
||||
for (const step of steps) {
|
||||
for (const [i, toolCall] of step.toolCalls.entries()) {
|
||||
toolNames.push(toolCall.toolName)
|
||||
const toolResult = step.toolResults.at(i)
|
||||
if (!toolResult) continue
|
||||
const parsed = parseToolCall(toolCall, toolResult)
|
||||
if (parsed.sqlQuery) sqlQueries.push(parsed.sqlQuery)
|
||||
if (parsed.docs) docs.push(...parsed.docs)
|
||||
}
|
||||
}
|
||||
|
||||
return { finishReason, steps: simplifiedSteps, toolNames, sqlQueries, docs }
|
||||
}
|
||||
7
apps/studio/evals/scorer-online-manifest.json
Normal file
7
apps/studio/evals/scorer-online-manifest.json
Normal file
@@ -0,0 +1,7 @@
|
||||
[
|
||||
{ "slug": "goal-completion", "name": "Goal Completion" },
|
||||
{ "slug": "conciseness", "name": "Conciseness" },
|
||||
{ "slug": "completeness", "name": "Completeness" },
|
||||
{ "slug": "docs-faithfulness", "name": "Docs Faithfulness" },
|
||||
{ "slug": "url-validity", "name": "URL Validity" }
|
||||
]
|
||||
57
apps/studio/evals/scorer-online.ts
Normal file
57
apps/studio/evals/scorer-online.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* Entry point for `braintrust push` to deploy scorers to Braintrust.
|
||||
*
|
||||
* Excluded scorers:
|
||||
* - sqlSyntaxScorer, sqlIdentifierQuotingScorer: use libpg-query (WASM),
|
||||
* which esbuild cannot bundle for Braintrust's remote infra.
|
||||
* - toolUsageScorer: requires expected.requiredTools, offline-eval-only.
|
||||
* - correctnessScorer: requires ground truth (expected output), offline-eval-only.
|
||||
*/
|
||||
|
||||
import braintrust, { type EvalScorer } from 'braintrust'
|
||||
|
||||
import {
|
||||
completenessScorer,
|
||||
concisenessScorer,
|
||||
docsFaithfulnessScorer,
|
||||
goalCompletionScorer,
|
||||
urlValidityScorer,
|
||||
type AssistantEvalInput,
|
||||
type AssistantEvalOutput,
|
||||
type Expected,
|
||||
} from './scorer'
|
||||
import manifest from './scorer-online-manifest.json'
|
||||
|
||||
const projectId = process.env.BRAINTRUST_PROJECT_ID
|
||||
if (!projectId && process.env.IS_BRAINTRUST_PUSH)
|
||||
throw new Error('BRAINTRUST_PROJECT_ID is not set')
|
||||
|
||||
// When running in CI, prefix scorers with the branch name to avoid collisions between PRs
|
||||
// in the staging project. GITHUB_HEAD_REF is set on PR events, GITHUB_REF_NAME on push/dispatch.
|
||||
const branch = process.env.GITHUB_HEAD_REF || process.env.GITHUB_REF_NAME
|
||||
const prefix = branch ? `${branch.replace(/[^a-z0-9-]/gi, '-').toLowerCase()}-` : ''
|
||||
const prNumber = process.env.GITHUB_PR_NUMBER ? Number(process.env.GITHUB_PR_NUMBER) : undefined
|
||||
const metadata = branch ? { gitBranch: branch, ...(prNumber && { prNumber }) } : undefined
|
||||
const description = prNumber && branch ? `#${prNumber} · ${branch}` : branch
|
||||
|
||||
const handlers = {
|
||||
'goal-completion': goalCompletionScorer,
|
||||
conciseness: concisenessScorer,
|
||||
completeness: completenessScorer,
|
||||
'docs-faithfulness': docsFaithfulnessScorer,
|
||||
'url-validity': urlValidityScorer,
|
||||
} satisfies Record<string, EvalScorer<AssistantEvalInput, AssistantEvalOutput, Expected>>
|
||||
|
||||
// @ts-expect-error - Project ID is only required at build-time
|
||||
const project = braintrust.projects.create({ id: projectId })
|
||||
|
||||
for (const { slug, name } of manifest) {
|
||||
project.scorers.create({
|
||||
slug: `${prefix}${slug}`,
|
||||
name,
|
||||
description,
|
||||
handler: handlers[slug as keyof typeof handlers],
|
||||
ifExists: 'replace',
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
80
apps/studio/evals/scorer-wasm.ts
Normal file
80
apps/studio/evals/scorer-wasm.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import { EvalScorer } from 'braintrust'
|
||||
import { extractIdentifiers, isQuotedInSql, needsQuoting } from 'lib/sql-identifier-quoting'
|
||||
import { parse } from 'libpg-query'
|
||||
|
||||
import { AssistantEvalInput, AssistantEvalOutput, Expected } from './scorer'
|
||||
|
||||
export const sqlSyntaxScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ output }) => {
|
||||
if (output.sqlQueries === undefined || output.sqlQueries.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let validQueries = 0
|
||||
|
||||
for (const sql of output.sqlQueries) {
|
||||
try {
|
||||
await parse(sql)
|
||||
validQueries++
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
errors.push(`SQL syntax error: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'SQL Validity',
|
||||
score: validQueries / output.sqlQueries.length,
|
||||
metadata: errors.length > 0 ? { errors } : undefined,
|
||||
}
|
||||
}
|
||||
|
||||
export const sqlIdentifierQuotingScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ output }) => {
|
||||
// Skip if no SQL queries
|
||||
if (!output.sqlQueries?.length) {
|
||||
return null
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let totalNeedingQuotes = 0
|
||||
let properlyQuoted = 0
|
||||
|
||||
for (const sql of output.sqlQueries) {
|
||||
try {
|
||||
const ast = await parse(sql)
|
||||
const identifiers = extractIdentifiers(ast)
|
||||
|
||||
for (const identifier of identifiers) {
|
||||
if (needsQuoting(identifier)) {
|
||||
totalNeedingQuotes++
|
||||
if (isQuotedInSql(sql, identifier)) {
|
||||
properlyQuoted++
|
||||
} else {
|
||||
const sqlPreview = sql.length > 100 ? `${sql.substring(0, 100)}...` : sql
|
||||
errors.push(
|
||||
`Identifier "${identifier}" needs quoting but is not quoted in: ${sqlPreview}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Skip invalid SQL - already handled by sqlSyntaxScorer
|
||||
}
|
||||
}
|
||||
|
||||
const score = totalNeedingQuotes === 0 ? 1 : properlyQuoted / totalNeedingQuotes
|
||||
|
||||
return {
|
||||
name: 'SQL Identifier Quoting',
|
||||
score,
|
||||
metadata: errors.length > 0 ? { errors } : undefined,
|
||||
}
|
||||
}
|
||||
@@ -3,13 +3,10 @@ import { LLMClassifierFromTemplate } from 'autoevals'
|
||||
import { EvalCase, EvalScorer } from 'braintrust'
|
||||
import { stripIndent } from 'common-tags'
|
||||
import { extractUrls } from 'lib/helpers'
|
||||
import { extractIdentifiers } from 'lib/sql-identifier-quoting'
|
||||
import { isQuotedInSql, needsQuoting } from 'lib/sql-identifier-quoting'
|
||||
import { parse } from 'libpg-query'
|
||||
|
||||
const LLM_AS_A_JUDGE_MODEL = 'gpt-5.2-2025-12-11'
|
||||
const LLM_AS_A_JUDGE_MODEL = 'gpt-5.2' // NOTE: `gpt-5.2-2025-12-11` snapshot not yet working with online scorers
|
||||
|
||||
type Input = {
|
||||
export type AssistantEvalInput = {
|
||||
prompt: string
|
||||
mockTables?: Record<
|
||||
string,
|
||||
@@ -21,7 +18,7 @@ type Input = {
|
||||
>
|
||||
}
|
||||
|
||||
type Output = {
|
||||
export type AssistantEvalOutput = {
|
||||
finishReason: FinishReason
|
||||
steps: Array<{ text: string; toolCalls: Array<{ toolName: string; input: unknown }> }>
|
||||
toolNames: string[]
|
||||
@@ -50,12 +47,12 @@ export type AssistantEvalCaseMetadata = {
|
||||
description?: string
|
||||
}
|
||||
|
||||
export type AssistantEvalCase = EvalCase<Input, Expected, AssistantEvalCaseMetadata>
|
||||
export type AssistantEvalCase = EvalCase<AssistantEvalInput, Expected, AssistantEvalCaseMetadata>
|
||||
|
||||
/**
|
||||
* Serialize steps into a string representation including text and tool calls
|
||||
*/
|
||||
function serializeSteps(steps: Output['steps']): string {
|
||||
function serializeSteps(steps: AssistantEvalOutput['steps']): string {
|
||||
return steps
|
||||
.map((step) => {
|
||||
const toolCalls = step.toolCalls
|
||||
@@ -69,17 +66,18 @@ function serializeSteps(steps: Output['steps']): string {
|
||||
/**
|
||||
* Extract only the text content from steps, filtering out empty text
|
||||
*/
|
||||
function extractTextOnly(steps: Output['steps']): string {
|
||||
function extractTextOnly(steps: AssistantEvalOutput['steps']): string {
|
||||
return steps
|
||||
.map((step) => step.text)
|
||||
.filter((text) => text && text.trim().length > 0)
|
||||
.join('\n')
|
||||
}
|
||||
|
||||
export const toolUsageScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
output,
|
||||
expected,
|
||||
}) => {
|
||||
export const toolUsageScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ output, expected }) => {
|
||||
if (!expected.requiredTools) return null
|
||||
|
||||
const presentCount = expected.requiredTools.filter((tool) =>
|
||||
@@ -94,31 +92,6 @@ export const toolUsageScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
}
|
||||
}
|
||||
|
||||
export const sqlSyntaxScorer: EvalScorer<Input, Output, Expected> = async ({ output }) => {
|
||||
if (output.sqlQueries === undefined || output.sqlQueries.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let validQueries = 0
|
||||
|
||||
for (const sql of output.sqlQueries) {
|
||||
try {
|
||||
await parse(sql)
|
||||
validQueries++
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
errors.push(`SQL syntax error: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name: 'SQL Validity',
|
||||
score: validQueries / output.sqlQueries.length,
|
||||
metadata: errors.length > 0 ? { errors } : undefined,
|
||||
}
|
||||
}
|
||||
|
||||
const concisenessEvaluator = LLMClassifierFromTemplate<{ input: string }>({
|
||||
name: 'Conciseness',
|
||||
promptTemplate: stripIndent`
|
||||
@@ -137,7 +110,11 @@ const concisenessEvaluator = LLMClassifierFromTemplate<{ input: string }>({
|
||||
model: LLM_AS_A_JUDGE_MODEL,
|
||||
})
|
||||
|
||||
export const concisenessScorer: EvalScorer<Input, Output, Expected> = async ({ input, output }) => {
|
||||
export const concisenessScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ input, output }) => {
|
||||
return await concisenessEvaluator({
|
||||
input: input.prompt,
|
||||
output: serializeSteps(output.steps),
|
||||
@@ -161,10 +138,11 @@ const completenessEvaluator = LLMClassifierFromTemplate<{ input: string }>({
|
||||
model: LLM_AS_A_JUDGE_MODEL,
|
||||
})
|
||||
|
||||
export const completenessScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
input,
|
||||
output,
|
||||
}) => {
|
||||
export const completenessScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ input, output }) => {
|
||||
return await completenessEvaluator({
|
||||
input: input.prompt,
|
||||
output: serializeSteps(output.steps),
|
||||
@@ -189,10 +167,11 @@ const goalCompletionEvaluator = LLMClassifierFromTemplate<{ input: string }>({
|
||||
model: LLM_AS_A_JUDGE_MODEL,
|
||||
})
|
||||
|
||||
export const goalCompletionScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
input,
|
||||
output,
|
||||
}) => {
|
||||
export const goalCompletionScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ input, output }) => {
|
||||
return await goalCompletionEvaluator({
|
||||
input: input.prompt,
|
||||
output: serializeSteps(output.steps),
|
||||
@@ -220,7 +199,11 @@ const docsFaithfulnessEvaluator = LLMClassifierFromTemplate<{ docs: string }>({
|
||||
model: LLM_AS_A_JUDGE_MODEL,
|
||||
})
|
||||
|
||||
export const docsFaithfulnessScorer: EvalScorer<Input, Output, Expected> = async ({ output }) => {
|
||||
export const docsFaithfulnessScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ output }) => {
|
||||
// Skip scoring if no docs were retrieved
|
||||
if (!output.docs || output.docs.length === 0) {
|
||||
return null
|
||||
@@ -261,11 +244,11 @@ const correctnessEvaluator = LLMClassifierFromTemplate<{ input: string; expected
|
||||
model: LLM_AS_A_JUDGE_MODEL,
|
||||
})
|
||||
|
||||
export const correctnessScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
input,
|
||||
output,
|
||||
expected,
|
||||
}) => {
|
||||
export const correctnessScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ input, output, expected }) => {
|
||||
// Skip scoring if no ground truth is provided
|
||||
if (!expected.correctAnswer) {
|
||||
return null
|
||||
@@ -278,77 +261,44 @@ export const correctnessScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
})
|
||||
}
|
||||
|
||||
export const sqlIdentifierQuotingScorer: EvalScorer<Input, Output, Expected> = async ({
|
||||
input,
|
||||
output,
|
||||
}) => {
|
||||
// Skip if no SQL queries
|
||||
if (!output.sqlQueries?.length) {
|
||||
return null
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let totalNeedingQuotes = 0
|
||||
let properlyQuoted = 0
|
||||
|
||||
for (const sql of output.sqlQueries) {
|
||||
try {
|
||||
const ast = await parse(sql)
|
||||
const identifiers = extractIdentifiers(ast)
|
||||
|
||||
for (const identifier of identifiers) {
|
||||
if (needsQuoting(identifier)) {
|
||||
totalNeedingQuotes++
|
||||
if (isQuotedInSql(sql, identifier)) {
|
||||
properlyQuoted++
|
||||
} else {
|
||||
const sqlPreview = sql.length > 100 ? `${sql.substring(0, 100)}...` : sql
|
||||
errors.push(
|
||||
`Identifier "${identifier}" needs quoting but is not quoted in: ${sqlPreview}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip invalid SQL - already handled by sqlSyntaxScorer
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
const score = totalNeedingQuotes === 0 ? 1 : properlyQuoted / totalNeedingQuotes
|
||||
|
||||
return {
|
||||
name: 'SQL Identifier Quoting',
|
||||
score,
|
||||
metadata: errors.length > 0 ? { errors } : undefined,
|
||||
}
|
||||
}
|
||||
|
||||
export const urlValidityScorer: EvalScorer<Input, Output, Expected> = async ({ output }) => {
|
||||
export const urlValidityScorer: EvalScorer<
|
||||
AssistantEvalInput,
|
||||
AssistantEvalOutput,
|
||||
Expected
|
||||
> = async ({ output }) => {
|
||||
const responseText = extractTextOnly(output.steps)
|
||||
const urls = extractUrls(responseText, { excludeCodeBlocks: true, excludeTemplates: true })
|
||||
const allUrls = extractUrls(responseText, { excludeCodeBlocks: true, excludeTemplates: true })
|
||||
const urls = allUrls.filter((url) => {
|
||||
try {
|
||||
const { hostname } = new URL(url)
|
||||
return hostname === 'supabase.com' || hostname.endsWith('.supabase.com')
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
// Skip if no URLs found
|
||||
if (urls.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const errors: string[] = []
|
||||
let validUrls = 0
|
||||
|
||||
for (const url of urls) {
|
||||
try {
|
||||
const response = await fetch(url, { method: 'HEAD', signal: AbortSignal.timeout(5000) })
|
||||
if (response.ok) {
|
||||
validUrls++
|
||||
} else {
|
||||
errors.push(`${url} returned ${response.status}`)
|
||||
const results = await Promise.all(
|
||||
urls.map(async (url) => {
|
||||
try {
|
||||
const response = await fetch(url, { method: 'HEAD', signal: AbortSignal.timeout(5000) })
|
||||
if (response.ok) {
|
||||
return { valid: true }
|
||||
}
|
||||
return { valid: false, error: `${url} returned ${response.status}` }
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
return { valid: false, error: `${url} failed: ${errorMessage}` }
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
errors.push(`${url} failed: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
const errors = results.flatMap((r) => (r.error ? [r.error] : []))
|
||||
const validUrls = results.filter((r) => r.valid).length
|
||||
|
||||
const metadata = {
|
||||
urls,
|
||||
|
||||
@@ -8,8 +8,10 @@ import {
|
||||
type ToolSet,
|
||||
type UIMessage,
|
||||
} from 'ai'
|
||||
import { traced, wrapAISDK, type Span } from 'braintrust'
|
||||
import { startSpan, traced, withCurrent, wrapAISDK, type Span } from 'braintrust'
|
||||
import { source } from 'common-tags'
|
||||
import { buildAssistantEvalOutput } from 'evals/output'
|
||||
import type { AssistantEvalInput, AssistantEvalOutput } from 'evals/scorer'
|
||||
import type { AiOptInLevel } from 'hooks/misc/useOrgOptedIntoAi'
|
||||
import { IS_TRACING_ENABLED } from 'lib/ai/braintrust-logger'
|
||||
import {
|
||||
@@ -66,10 +68,6 @@ export async function generateAssistantResponse({
|
||||
const shouldTrace = IS_TRACING_ENABLED && !isHipaaEnabled
|
||||
|
||||
const run = async (span?: Span) => {
|
||||
if (span) {
|
||||
onSpanCreated?.(span.id)
|
||||
}
|
||||
|
||||
// Only returns last 7 messages
|
||||
// Filters out tools with invalid states
|
||||
// Filters out tool outputs based on opt-in level using renderingToolOutputParser
|
||||
@@ -144,24 +142,37 @@ export async function generateAssistantResponse({
|
||||
|
||||
const streamTextFn = shouldTrace ? tracedStreamText : ai.streamText
|
||||
|
||||
const streamTextArgs = {
|
||||
return streamTextFn({
|
||||
model,
|
||||
stopWhen: stepCountIs(5),
|
||||
messages: coreMessages,
|
||||
...(providerOptions && { providerOptions }),
|
||||
tools,
|
||||
...(abortSignal && { abortSignal }),
|
||||
onFinish: ({ steps }) => {
|
||||
for (const step of steps) {
|
||||
for (const toolCall of step.toolCalls) {
|
||||
if (toolCall.toolName === 'rename_chat') {
|
||||
const { newName } = toolCall.input as { newName: string }
|
||||
span?.log({ metadata: { chatName: newName } })
|
||||
...(span && {
|
||||
onFinish: ({ steps, finishReason }) => {
|
||||
for (const step of steps) {
|
||||
for (const toolCall of step.toolCalls) {
|
||||
if (toolCall.toolName === 'rename_chat') {
|
||||
const { newName } = toolCall.input as { newName: string }
|
||||
span.log({ metadata: { chatName: newName } })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
} satisfies Parameters<typeof ai.streamText>[0]
|
||||
span.log({
|
||||
output: buildAssistantEvalOutput(finishReason, steps) satisfies AssistantEvalOutput,
|
||||
})
|
||||
span.end()
|
||||
},
|
||||
}),
|
||||
} satisfies Parameters<typeof ai.streamText>[0])
|
||||
}
|
||||
|
||||
if (shouldTrace) {
|
||||
// startSpan instead of traced() so we control when the span closes — onFinish logs
|
||||
// output to the span before we call span.end(), ensuring online scoring sees the output.
|
||||
const span = startSpan({ name: 'generateAssistantResponse', type: 'function' })
|
||||
onSpanCreated?.(span.id)
|
||||
|
||||
const lastUserMessage = rawMessages.findLast((m) => m.role === 'user')
|
||||
const lastUserText = lastUserMessage?.parts
|
||||
@@ -169,8 +180,8 @@ export async function generateAssistantResponse({
|
||||
.map((p) => p.text)
|
||||
.join('\n')
|
||||
|
||||
span?.log({
|
||||
input: lastUserText,
|
||||
span.log({
|
||||
input: { prompt: lastUserText ?? '' } satisfies AssistantEvalInput,
|
||||
metadata: {
|
||||
projectRef,
|
||||
chatId,
|
||||
@@ -185,11 +196,7 @@ export async function generateAssistantResponse({
|
||||
},
|
||||
})
|
||||
|
||||
return streamTextFn(streamTextArgs)
|
||||
}
|
||||
|
||||
if (shouldTrace) {
|
||||
return traced(run, { type: 'function', name: 'generateAssistantResponse' })
|
||||
return withCurrent(span, () => run(span))
|
||||
}
|
||||
|
||||
return run()
|
||||
|
||||
@@ -24,7 +24,8 @@
|
||||
"build:graphql-types:watch": "pnpm graphql-codegen --config scripts/codegen.ts --watch",
|
||||
"evals:setup": "cp node_modules/libpg-query/wasm/libpg-query.wasm evals/libpg-query.wasm",
|
||||
"evals:run": "braintrust eval --no-send-logs evals/assistant.eval.ts",
|
||||
"evals:upload": "braintrust eval evals/assistant.eval.ts"
|
||||
"evals:upload": "braintrust eval evals/assistant.eval.ts",
|
||||
"scorers:deploy": "IS_BRAINTRUST_PUSH=true braintrust push evals/scorer-online.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/amazon-bedrock": "^3.0.0",
|
||||
@@ -183,7 +184,7 @@
|
||||
"@vitest/coverage-v8": "^3.2.0",
|
||||
"@vitest/ui": "^3.2.0",
|
||||
"api-types": "workspace:*",
|
||||
"autoevals": "^0.0.131",
|
||||
"autoevals": "^0.0.132",
|
||||
"braintrust": "^3.0.0",
|
||||
"common": "workspace:*",
|
||||
"config": "workspace:*",
|
||||
|
||||
30
pnpm-lock.yaml
generated
30
pnpm-lock.yaml
generated
@@ -1213,8 +1213,8 @@ importers:
|
||||
specifier: workspace:*
|
||||
version: link:../../packages/api-types
|
||||
autoevals:
|
||||
specifier: ^0.0.131
|
||||
version: 0.0.131(encoding@0.1.13)(ws@8.18.3)
|
||||
specifier: ^0.0.132
|
||||
version: 0.0.132(ws@8.18.3)
|
||||
braintrust:
|
||||
specifier: ^3.0.0
|
||||
version: 3.0.0(@aws-sdk/credential-provider-web-identity@3.830.0)(supports-color@8.1.1)(zod@3.25.76)
|
||||
@@ -9712,8 +9712,8 @@ packages:
|
||||
resolution: {integrity: sha512-Hdw8qdNiqdJ8LqT0iK0sVzkFbzg6fhnQqqfWhBDxcHZvU75+B+ayzTy8x+k5Ix0Y92XOhOUlx74ps+bA6BeYMQ==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
autoevals@0.0.131:
|
||||
resolution: {integrity: sha512-F+3lraja+Ms7n1M2cpWl65N7AYx4sPocRW454H5HlSGabYMfuFOUxw8IXmEYDkQ38BxtZ0Wd5ZAQj9RF59YJWw==}
|
||||
autoevals@0.0.132:
|
||||
resolution: {integrity: sha512-x033hXLO1Vyggbv68Y1QeoZlrdKHcNexcutPhVaDhDJ4SO6TU1rG6vME77g/zKvH4VWFVBPDWM1pRbPxAFF+sA==}
|
||||
|
||||
autolinker@0.28.1:
|
||||
resolution: {integrity: sha512-zQAFO1Dlsn69eXaO6+7YZc+v84aquQKbwpzCE3L0stj56ERn9hutFxPopViLjo9G+rWwjozRhgS5KJ25Xy19cQ==}
|
||||
@@ -14932,6 +14932,18 @@ packages:
|
||||
zod:
|
||||
optional: true
|
||||
|
||||
openai@6.22.0:
|
||||
resolution: {integrity: sha512-7Yvy17F33Bi9RutWbsaYt5hJEEJ/krRPOrwan+f9aCPuMat1WVsb2VNSII5W1EksKT6fF69TG/xj4XzodK3JZw==}
|
||||
hasBin: true
|
||||
peerDependencies:
|
||||
ws: ^8.18.0
|
||||
zod: ^3.25 || ^4.0
|
||||
peerDependenciesMeta:
|
||||
ws:
|
||||
optional: true
|
||||
zod:
|
||||
optional: true
|
||||
|
||||
openapi-fetch@0.12.4:
|
||||
resolution: {integrity: sha512-Hb7WNjZNir5P4RjacbYU4tXAiVay1Hj83BHkBE5Psn3A58Xfkp+vx0Ky76O4ZTw2VzaH3cTk1qbf4vQ6H/zmPw==}
|
||||
|
||||
@@ -27348,7 +27360,7 @@ snapshots:
|
||||
|
||||
auto-bind@4.0.0: {}
|
||||
|
||||
autoevals@0.0.131(encoding@0.1.13)(ws@8.18.3):
|
||||
autoevals@0.0.132(ws@8.18.3):
|
||||
dependencies:
|
||||
ajv: 8.18.0
|
||||
compute-cosine-similarity: 1.1.0
|
||||
@@ -27356,11 +27368,10 @@ snapshots:
|
||||
js-yaml: 4.1.1
|
||||
linear-sum-assignment: 1.0.9
|
||||
mustache: 4.2.0
|
||||
openai: 4.104.0(encoding@0.1.13)(ws@8.18.3)(zod@3.25.76)
|
||||
openai: 6.22.0(ws@8.18.3)(zod@3.25.76)
|
||||
zod: 3.25.76
|
||||
zod-to-json-schema: 3.25.0(zod@3.25.76)
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- ws
|
||||
|
||||
autolinker@0.28.1:
|
||||
@@ -33862,6 +33873,11 @@ snapshots:
|
||||
ws: 8.18.3
|
||||
zod: 3.25.76
|
||||
|
||||
openai@6.22.0(ws@8.18.3)(zod@3.25.76):
|
||||
optionalDependencies:
|
||||
ws: 8.18.3
|
||||
zod: 3.25.76
|
||||
|
||||
openapi-fetch@0.12.4:
|
||||
dependencies:
|
||||
openapi-typescript-helpers: 0.0.14
|
||||
|
||||
@@ -103,6 +103,10 @@
|
||||
"ASSET_CDN_S3_ENDPOINT",
|
||||
"SITE_NAME",
|
||||
"VERCEL_URL",
|
||||
"IS_BRAINTRUST_PUSH",
|
||||
"GITHUB_HEAD_REF",
|
||||
"GITHUB_REF_NAME",
|
||||
"GITHUB_PR_NUMBER",
|
||||
"IS_THROTTLED",
|
||||
"AI_PRO_MODEL",
|
||||
"AI_NORMAL_MODEL",
|
||||
|
||||
Reference in New Issue
Block a user