mirror of
https://github.com/supabase/supabase.git
synced 2026-06-20 03:27:24 +08:00
## Summary Google Search Console flagged 4 "URL not allowed" errors on `sitemap_www.xml` — malformed URLs like `https://supabase.comdata/legal/terms/v1` (missing slash, non-existent path). The generator was globbing `data/**/*.mdx`, picking up the 4 content-source MDX files under `data/legal/` that are imported by `pages/terms.tsx` and `pages/enterprise-terms.tsx` but are not themselves routed. With no path replacement mapping `data/...` to a route and no leading slash, the URL template concatenated to garbage. The real `/terms` and `/enterprise-terms` URLs come from the `pages/*.tsx` glob and are unaffected. ## Changes - Remove `data/**/*.mdx` glob (and its companion `!data/*.mdx` exclude) from the sitemap generator. `apps/www/data/` has no routed MDX, only content sources imported into pages. - Anchor the `pages` prefix replace: `.replace('pages', '')` → `.replace(/^pages/, '')`. String-form replace is first-occurrence and would mangle any future filename containing `pages` as a non-prefix substring (e.g., `_blog/about-pages.mdx` → `/blog/about-`). No current files trigger this; defensive hardening. ## Testing Regenerated the sitemap locally and verified: - [x] `grep -c "supabase.comdata" public/sitemap_www.xml` → `0` (was 4) - [x] `<loc>https://supabase.com/terms</loc>` and `<loc>https://supabase.com/enterprise-terms</loc>` still present - [x] Every `<loc>` matches `^<loc>https://supabase\.com(/[a-zA-Z0-9].*)?</loc>$` (no malformed URLs of any kind) - [x] Total loc count stable across both commits (regression-free for the anchor change) Local count is lower than prod (527 vs 906) because `.next/server/pages/**` partner/expert/feature HTML globs only resolve after a full build — runs correctly via `postbuild` on Vercel. After deploy lands, resubmit `sitemap_www.xml` in Google Search Console to force a re-crawl (otherwise daily-ish). Expect status to flip from "4 errors" to "Success" and Discovered pages: 906 → 902. ## Linear - fixes GROWTH-837 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Release Notes * **Chores** * Improved sitemap generation to properly index specific content sections (blog, case studies, customers, events, and alternatives) with refined route path processing for better search engine discoverability. [](https://app.coderabbit.ai/change-stack/supabase/supabase/pull/45775) <!-- end of auto-generated comment: release notes by coderabbit.ai -->
176 lines
4.9 KiB
JavaScript
176 lines
4.9 KiB
JavaScript
import { readFileSync, writeFileSync } from 'fs'
|
|
import { globby } from 'globby'
|
|
import prettier from 'prettier'
|
|
|
|
async function generate() {
|
|
const prettierConfig = await prettier.resolveConfig('./.prettierrc.js')
|
|
|
|
const unsortedPages = await globby([
|
|
'pages/*.js',
|
|
'pages/*.tsx',
|
|
'pages/*.mdx',
|
|
'pages/**/*.tsx',
|
|
'_blog/*.mdx',
|
|
'_case-studies/*.mdx',
|
|
'_customers/*.mdx',
|
|
'_events/*.mdx',
|
|
'_alternatives/*.mdx',
|
|
'!pages/_*.js',
|
|
'!pages/_*.tsx',
|
|
'!pages/api',
|
|
'!pages/404.tsx',
|
|
'.next/server/pages/partners/integrations/*.html',
|
|
'.next/server/pages/partners/experts/*.html',
|
|
'.next/server/pages/features/*.html',
|
|
])
|
|
|
|
const pages = unsortedPages.sort((a, b) => a.localeCompare(b))
|
|
|
|
const blogUrl = 'blog'
|
|
const caseStudiesUrl = 'case-studies'
|
|
const customerStoriesUrl = 'customers'
|
|
const eventsUrl = 'events'
|
|
|
|
// Generate URLs for static pages
|
|
const staticUrls = pages
|
|
.map((page) => {
|
|
const path = page
|
|
.replace('.next/server/pages', '')
|
|
.replace(/^pages/, '')
|
|
.replace('.html', '')
|
|
// add a `/` for blog posts
|
|
.replace('_blog', `/${blogUrl}`)
|
|
.replace('_case-studies', `/${caseStudiesUrl}`)
|
|
.replace('_customers', `/${customerStoriesUrl}`)
|
|
.replace('_events', `/${eventsUrl}`)
|
|
.replace('_alternatives', '/alternatives')
|
|
.replace('.tsx', '')
|
|
.replace('.mdx', '')
|
|
// replace /{directory}/index with /{directory}
|
|
.replace(/\/([^\/]+)\/index/, '/$1')
|
|
|
|
let route = path === '/index' ? '' : path
|
|
|
|
if (route === '/alternatives/[slug]') return null
|
|
if (route === '/partners/[slug]') return null
|
|
if (route === '/case-studies/[slug]') return null
|
|
if (route === '/customers/[slug]') return null
|
|
if (route === '/events/[slug]') return null
|
|
if (route === '/features/[slug]') return null
|
|
if (route === '/blog/categories/[category]') return null
|
|
if (route === '/partners/experts/[slug]') return null
|
|
if (route === '/partners/integrations/[slug]') return null
|
|
if (route === '/launch-week/ticket-image') return null
|
|
if (route === '/launch-week/tickets/[username]') return null
|
|
if (route === '/changelog/[slug]') return null
|
|
|
|
/**
|
|
* Blog based urls
|
|
* handle removal of dates in filename
|
|
*/
|
|
if (route.includes(`/${blogUrl}/`)) {
|
|
/**
|
|
* remove directory from route
|
|
*/
|
|
const _route = route.replace(`/${blogUrl}/`, '')
|
|
/**
|
|
* remove the date from the file name
|
|
*/
|
|
const substring = _route.substring(11)
|
|
/**
|
|
* reconsruct the route
|
|
*/
|
|
route = `/${blogUrl}/` + substring
|
|
}
|
|
|
|
/**
|
|
* Event based urls
|
|
* handle removal of dates in filename
|
|
*/
|
|
if (route.includes(`/${eventsUrl}/`)) {
|
|
// remove finelnames with __
|
|
if (route.includes(`__`)) return null
|
|
/**
|
|
* remove directory from route
|
|
*/
|
|
const _route = route.replace(`/${eventsUrl}/`, '')
|
|
/**
|
|
* remove the date from the file name
|
|
*/
|
|
const substring = _route.substring(11)
|
|
/**
|
|
* reconsruct the route
|
|
*/
|
|
route = `/${eventsUrl}/` + substring
|
|
}
|
|
|
|
return `
|
|
<url>
|
|
<loc>${`https://supabase.com${route}`}</loc>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>0.5</priority>
|
|
</url>
|
|
`
|
|
})
|
|
.filter(Boolean)
|
|
|
|
// Changelog detail pages are dynamic routes; include them from generated changelog RSS links.
|
|
const changelogDetailUrls = (() => {
|
|
try {
|
|
const rss = readFileSync('public/changelog-rss.xml', 'utf-8')
|
|
const matches = [
|
|
...rss.matchAll(/<link>(https:\/\/supabase\.com\/changelog\/\d+[^<]*)<\/link>/g),
|
|
]
|
|
const uniqueUrls = [...new Set(matches.map((match) => match[1]))]
|
|
|
|
return uniqueUrls.map(
|
|
(url) => `
|
|
<url>
|
|
<loc>${url}</loc>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>0.5</priority>
|
|
</url>
|
|
`
|
|
)
|
|
} catch {
|
|
return []
|
|
}
|
|
})()
|
|
|
|
const sitemap = `
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
${[...staticUrls, ...changelogDetailUrls].join('')}
|
|
</urlset>
|
|
`
|
|
|
|
const formatted = await prettier.format(sitemap, {
|
|
...prettierConfig,
|
|
parser: 'html',
|
|
})
|
|
|
|
/**
|
|
* generate sitemap router
|
|
*
|
|
* this points to www and docs sitemaps
|
|
*/
|
|
const sitemapRouter = `<?xml version="1.0" encoding="UTF-8"?>
|
|
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
<sitemap>
|
|
<loc>https://supabase.com/sitemap_www.xml</loc>
|
|
</sitemap>
|
|
<sitemap>
|
|
<loc>https://supabase.com/docs/sitemap.xml</loc>
|
|
</sitemap>
|
|
</sitemapindex>
|
|
`
|
|
|
|
/**
|
|
* write sitemaps
|
|
*/
|
|
writeFileSync('public/sitemap.xml', sitemapRouter)
|
|
writeFileSync('public/sitemap_www.xml', formatted)
|
|
}
|
|
|
|
generate()
|