mirror of
https://github.com/supabase/supabase.git
synced 2026-07-02 16:44:20 +08:00
strictNullChecks was off for docs, which lets errors slip through and leads to incorrect required/optional typing on Zod-inferred types. This PR enables strictNullChecks and fixes all the existing violations.
251 lines
6.7 KiB
TypeScript
251 lines
6.7 KiB
TypeScript
import '../utils/dotenv.js'
|
|
|
|
import { createClient } from '@supabase/supabase-js'
|
|
import { parseArgs } from 'node:util'
|
|
import { OpenAI } from 'openai'
|
|
import { v4 as uuidv4 } from 'uuid'
|
|
import type { Json, Section } from '../helpers.mdx.js'
|
|
import { fetchAllSources } from './sources/index.js'
|
|
|
|
const args = parseArgs({
|
|
options: {
|
|
refresh: {
|
|
type: 'boolean',
|
|
},
|
|
},
|
|
})
|
|
|
|
async function generateEmbeddings() {
|
|
const shouldRefresh = Boolean(args.values.refresh)
|
|
|
|
const requiredEnvVars = [
|
|
'DOCS_GITHUB_APP_ID',
|
|
'DOCS_GITHUB_APP_INSTALLATION_ID',
|
|
'DOCS_GITHUB_APP_PRIVATE_KEY',
|
|
'NEXT_PUBLIC_MISC_ANON_KEY',
|
|
'NEXT_PUBLIC_MISC_URL',
|
|
'NEXT_PUBLIC_SUPABASE_URL',
|
|
'OPENAI_API_KEY',
|
|
'SUPABASE_SECRET_KEY',
|
|
]
|
|
|
|
const missingEnvVars = requiredEnvVars.filter((name) => !process.env[name])
|
|
if (missingEnvVars.length > 0) {
|
|
throw new Error(
|
|
`Environment variables ${missingEnvVars.join(
|
|
', '
|
|
)} are required: skipping embeddings generation`
|
|
)
|
|
}
|
|
|
|
const supabaseClient = createClient(
|
|
process.env.NEXT_PUBLIC_SUPABASE_URL!,
|
|
process.env.SUPABASE_SECRET_KEY!,
|
|
{
|
|
auth: {
|
|
persistSession: false,
|
|
autoRefreshToken: false,
|
|
},
|
|
}
|
|
)
|
|
|
|
// Use this version to track which pages to purge
|
|
// after the refresh
|
|
const refreshVersion = uuidv4()
|
|
|
|
const refreshDate = new Date()
|
|
|
|
const embeddingSources = await fetchAllSources()
|
|
|
|
console.log(`Discovered ${embeddingSources.length} pages`)
|
|
|
|
if (!shouldRefresh) {
|
|
console.log('Checking which pages are new or have changed')
|
|
} else {
|
|
console.log('Refresh flag set, re-generating all pages')
|
|
}
|
|
|
|
for (const embeddingSource of embeddingSources) {
|
|
const { type, source, path } = embeddingSource
|
|
|
|
try {
|
|
const {
|
|
checksum,
|
|
sections,
|
|
meta = {},
|
|
ragIgnore = false,
|
|
}: {
|
|
checksum: string
|
|
sections: Section[]
|
|
ragIgnore?: boolean
|
|
meta?: Json
|
|
} = embeddingSource.process()
|
|
|
|
// Check for existing page in DB and compare checksums
|
|
const { error: fetchPageError, data: existingPage } = await supabaseClient
|
|
.from('page')
|
|
.select('id, path, checksum')
|
|
.filter('path', 'eq', path)
|
|
.limit(1)
|
|
.maybeSingle()
|
|
|
|
if (fetchPageError) {
|
|
throw fetchPageError
|
|
}
|
|
|
|
// We use checksum to determine if this page & its sections need to be regenerated
|
|
if (!shouldRefresh && existingPage?.checksum === checksum) {
|
|
// No content/embedding update required on this page
|
|
// Update other meta info
|
|
const { error: updatePageError } = await supabaseClient
|
|
.from('page')
|
|
.update({
|
|
type,
|
|
source,
|
|
meta,
|
|
version: refreshVersion,
|
|
last_refresh: refreshDate,
|
|
})
|
|
.filter('id', 'eq', existingPage.id)
|
|
|
|
if (updatePageError) {
|
|
throw updatePageError
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
if (existingPage) {
|
|
if (!shouldRefresh) {
|
|
console.log(
|
|
`[${path}] Docs have changed, removing old page sections and their embeddings`
|
|
)
|
|
} else {
|
|
console.log(`[${path}] Refresh flag set, removing old page sections and their embeddings`)
|
|
}
|
|
|
|
const { error: deletePageSectionError } = await supabaseClient
|
|
.from('page_section')
|
|
.delete()
|
|
.filter('page_id', 'eq', existingPage.id)
|
|
|
|
if (deletePageSectionError) {
|
|
throw deletePageSectionError
|
|
}
|
|
}
|
|
|
|
// Create/update page record. Intentionally clear checksum until we
|
|
// have successfully generated all page sections.
|
|
const { error: upsertPageError, data: page } = await supabaseClient
|
|
.from('page')
|
|
.upsert(
|
|
{
|
|
checksum: null,
|
|
path,
|
|
type,
|
|
source,
|
|
meta,
|
|
content: embeddingSource.extractIndexedContent(),
|
|
version: refreshVersion,
|
|
last_refresh: refreshDate,
|
|
},
|
|
{ onConflict: 'path' }
|
|
)
|
|
.select()
|
|
.limit(1)
|
|
.single()
|
|
|
|
if (upsertPageError) {
|
|
throw upsertPageError
|
|
}
|
|
|
|
console.log(`[${path}] Adding ${sections.length} page sections (with embeddings)`)
|
|
for (const { slug, heading, content } of sections) {
|
|
// OpenAI recommends replacing newlines with spaces for best results (specific to embeddings)
|
|
// force a redeploy
|
|
const input = content.replace(/\n/g, ' ')
|
|
|
|
try {
|
|
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
|
|
|
|
const embeddingResponse = await openai.embeddings.create({
|
|
model: 'text-embedding-ada-002',
|
|
input,
|
|
})
|
|
|
|
const [responseData] = embeddingResponse.data
|
|
|
|
const { error: insertPageSectionError } = await supabaseClient
|
|
.from('page_section')
|
|
.insert({
|
|
page_id: page.id,
|
|
slug,
|
|
heading,
|
|
content,
|
|
token_count: embeddingResponse.usage.total_tokens,
|
|
embedding: responseData.embedding,
|
|
rag_ignore: ragIgnore,
|
|
})
|
|
.select()
|
|
.limit(1)
|
|
.single()
|
|
|
|
if (insertPageSectionError) {
|
|
throw insertPageSectionError
|
|
}
|
|
} catch (err) {
|
|
// TODO: decide how to better handle failed embeddings
|
|
console.error(
|
|
`Failed to generate embeddings for '${path}' page section starting with '${input.slice(
|
|
0,
|
|
40
|
|
)}...'`
|
|
)
|
|
|
|
throw err
|
|
}
|
|
}
|
|
|
|
// Set page checksum so that we know this page was stored successfully
|
|
const { error: updatePageError } = await supabaseClient
|
|
.from('page')
|
|
.update({ checksum })
|
|
.filter('id', 'eq', page.id)
|
|
|
|
if (updatePageError) {
|
|
throw updatePageError
|
|
}
|
|
} catch (err) {
|
|
console.error(
|
|
`Page '${path}' or one/multiple of its page sections failed to store properly. Page has been marked with null checksum to indicate that it needs to be re-generated.`
|
|
)
|
|
console.error(err)
|
|
}
|
|
}
|
|
|
|
console.log(`Removing old pages and their sections`)
|
|
|
|
// Delete pages that have been removed (and their sections via cascade)
|
|
const { error: deletePageError } = await supabaseClient
|
|
.from('page')
|
|
.delete()
|
|
.filter('version', 'neq', refreshVersion)
|
|
|
|
if (deletePageError) {
|
|
throw deletePageError
|
|
}
|
|
|
|
console.log('Embedding generation complete')
|
|
}
|
|
|
|
async function main() {
|
|
await generateEmbeddings()
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error(err)
|
|
|
|
// Exit with non-zero code
|
|
process.exit(1)
|
|
})
|