Files
supabase/package.json
Matt Rossman 072883bcec feat: assistant evals (#41311)
* chore: bump `supabase` CLI

* chore: stricter message types in `generate-v4.ts`

* feat: tutorial eval

https://www.braintrust.dev/docs/evaluation

* feat: project ID for eval

* refactor: `generateAssistantResponse` out of `handlePost`

* refactor: generateAssistantResponse to lib/ai

* feat: factuality eval with assistant response

* chore: upgrade braintrust to v1.0.1

* chore: silence tsconfig warning

* feat: assertion scorer

* fix: aggregate tools across all steps

* refactor: strict tool names, remove need for `as const`

* refactor: generic tool name type in assertions

* feat: transfer mocks from `feature/braintrust`

* feat: LLM criteria assertion

* feat: braintrust evals workflow

* fix: BRAINTRUST_PROJECT_ID

* feat: `sql_similar` assertion

* fix: `OPENAI_API_KEY` in workflow env

* feat: split AssertionScorer into separate scorers

* feat: remove tutorial eval

* feat: 20 minute CI timeout

* feat: category in test case metadata

* feat: score with gpt-5

* refactor: dataset to own file, colocate scorers

* feat: "gpt-5.2-2025-12-11" for llm as a judge

* feat: SQL syntax scorer with `libpg-query`

* feat: `evals:setup` and `evals:run` scripts

* feat: `evals:setup` in CI

* feat: human readable scorer names

* chore: rename to "SQL Validity"

* feat: add 2 "sql_generation" test cases

* feat: update requiredTools in test cases

* chore: ignore Cursor MCP config

* feat: "Conciseness" score

* feat: "Completeness" scorer

* fix: generate-v4 test mocks

* feat: serialize "steps" for scorer inputs

* updated node mem options for typecheck

* updated runner

* remove ram update as actions handle this

* feat: read `BRAINTRUST_PROJECT_ID` from secrets

* feat: score helpfulness, remove old scorers

* feat: separate `evals:run` and `evals:upload` scripts

* feat: passthrough entire classifier result

* feat: use live `search_docs` impl, store docs result in metadata

* feat: reduce classifier options

* feat: filter workflow by `run-evals` PR label or `master` branch

* chore: cleanup stubbed mock tools

* fix: checkout actual branch with `ref:`

* fix: capture search_docs results from all content parts

* feat: simplify sql syntax score calculation

* feat: use AI SDK's UI message validator

* docs: justification for relative `extends`

* fix: cleanup leftover validatedMessages

* doc: note mock token isn't secret for snyk

* fix: mock ui message to pass validation

* feat: revert ignoring Cursor MCP config

Using `.git/info/exclude` instead until we have an opinion on this

* feat: add "tsconfig" as shared-data devDependency, revert relative path in tsconfig

* refactor: tool call parsing into function

* Update apps/studio/evals/assistant.eval.ts

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* refactor: organize mock schemas and tool factories

---------

Co-authored-by: Ali Waseem <waseema393@gmail.com>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
2025-12-22 23:45:48 -05:00

69 lines
3.3 KiB
JSON

{
"name": "supabase",
"description": "The Postgres Development Platform.",
"version": "0.0.0",
"author": "Supabase, Inc.",
"license": "Apache-2.0",
"private": true,
"scripts": {
"preinstall": "npx only-allow pnpm",
"build": "turbo run build",
"build:cms": "turbo run build --filter=cms --parallel",
"build:studio": "turbo run build --filter=studio",
"build:design-system": "turbo run build --filter=design-system",
"build:docs": "turbo run build --filter=docs",
"clean": "turbo run clean --parallel && rimraf -G node_modules/{*,.bin,.modules.yaml}",
"dev": "turbo run dev --parallel",
"dev:cms": "turbo run dev --filter=cms --parallel",
"dev:studio": "turbo run dev --filter=studio --parallel",
"dev:studio-local": "pnpm setup:cli && NODE_ENV=test pnpm --prefix ./apps/studio dev",
"dev:docs": "turbo run dev --filter=docs --parallel",
"dev:www": "turbo run dev --filter=www --parallel",
"dev:design-system": "turbo run dev --filter=design-system --parallel",
"lint": "turbo run lint",
"typecheck": "turbo --continue typecheck",
"test:prettier": "prettier --cache --check '{apps,packages}/**/*.{js,jsx,ts,tsx,css,md,mdx,json}'",
"format": "prettier --cache --write '{apps,packages}/**/*.{js,jsx,ts,tsx,css,md,mdx,json}'",
"test:docs": "turbo run test --filter=docs",
"test:ui": "turbo run test --filter=ui",
"test:ui-patterns": "turbo run test --filter=ui-patterns",
"test:studio": "turbo run test --filter=studio",
"test:studio:watch": "turbo run test --filter=studio -- watch",
"e2e:setup:cli": "supabase stop --all --no-backup ; supabase start --exclude studio && supabase db reset && supabase status --output json > keys.json && node scripts/generateLocalEnv.js",
"e2e:setup:selfhosted": "SKIP_ASSET_UPLOAD=1 pnpm e2e:setup:cli && NODE_ENV=test NODE_OPTIONS=\"--max-old-space-size=4096\" pnpm run build:studio && NODE_ENV=test pnpm --prefix ./apps/studio start",
"e2e:setup:platform": "SKIP_ASSET_UPLOAD=1 NODE_OPTIONS=\"--max-old-space-size=4096\" pnpm run build:studio && pnpm --prefix ./apps/studio start",
"e2e": "pnpm --prefix e2e/studio run e2e",
"e2e:ui": "pnpm --prefix e2e/studio run e2e:ui",
"perf:kong": "ab -t 5 -c 20 -T application/json http://localhost:8000/",
"perf:meta": "ab -t 5 -c 20 -T application/json http://localhost:5555/tables",
"setup:cli": "supabase start -x studio && supabase status --output json > keys.json && node scripts/generateLocalEnv.js",
"generate:types": "supabase gen types typescript --local > ./supabase/functions/common/database-types.ts",
"api:codegen": "cd packages/api-types && pnpm run codegen",
"knip": "pnpx knip@~5.50.0"
},
"devDependencies": {
"@aws-sdk/client-secrets-manager": "^3.823.0",
"@types/node": "catalog:",
"eslint": "^9.0.0",
"prettier": "3.2.4",
"prettier-plugin-sql-cst": "^0.11.0",
"rimraf": "^6.0.0",
"sass": "^1.72.0",
"supabase": "^2.65.6",
"supports-color": "^8.0.0",
"tailwindcss": "catalog:",
"turbo": "2.3.3",
"typescript": "catalog:"
},
"repository": {
"type": "git",
"url": "git+https://github.com/supabase/supabase.git"
},
"engines": {
"pnpm": "10.24",
"node": ">=22"
},
"keywords": ["postgres", "firebase", "storage", "functions", "database", "auth"],
"packageManager": "pnpm@10.24.0"
}