mirror of
https://github.com/supabase/supabase.git
synced 2026-06-20 14:26:06 +08:00
fix(docs): negotiate /guides/* markdown via shared helper (#45432)
## Summary
This brings docs `/guides/*` to full content negotiation for AI agents
(GROWTH-811):
RFC 9110 q-value parsing instead of a `.includes('text/markdown')`
substring match,
a 406 when the client rejects every type the route can produce, and
markdown rewrites
for known LLM user agents.
I implemented it by extracting the negotiation into a shared
`common/markdown-negotiation`
module consumed by both `apps/docs/middleware.ts` and
`apps/www/middleware.ts`, rather than
duplicating the helpers into docs and keeping them in sync by hand with
www (#45394). Single
source of truth, no re-sync burden. www is refactored onto the shared
helper with no behavior
change.
## Changes
### docs `/guides/*` content negotiation (GROWTH-811)
- Replace the `.includes('text/markdown')` substring match with RFC 9110
q-value parsing.
- Return 406 (`Cache-Control: no-store`, `Vary: Accept`) when Accept
excludes every type the
route serves. Bypassed for LLM user agents, the `.md` suffix, and
clients sending no Accept.
- Rewrite to `/api/guides-md/<slug>` for LLM user agents (Claude-User,
Claude-Web, ChatGPT-User,
PerplexityBot) regardless of Accept.
- Preserve the existing `.md` suffix routing and the entire
`/reference/*` block.
### Shared negotiation helper
- New `packages/common/markdown-negotiation.ts`:
`negotiateMarkdown(signals, route)` returns
`'markdown' | 'not-acceptable' | 'pass'`. Internalizes q-value parsing,
the LLM user-agent
match, the UA-length cap, and the markdown-vs-html preference.
- `apps/www/middleware.ts`: refactored to consume the shared helper; its
duplicated copy of the
negotiation helpers (added in #45394) is removed. `.md` early-return,
changelog routing, and
first-referrer cookie stamping are unchanged (no behavior change,
covered by its existing tests).
### Tests
- New `apps/docs/middleware.test.ts`: q-value priority, the 406 path,
`.md` suffix, LLM UA
override, browser default Accept, training-crawler and substring-embed
exclusion, and the
`/reference/*` exemption.
- New `packages/common/markdown-negotiation.test.ts`: the same decision
matrix at the unit level
(q-values, 406, LLM UAs, `.md`, `*/*`, training crawlers, OWS,
out-of-range q).
## Testing (Vercel preview)
After Vercel posts a preview URL, save it once then run the probe set.
```bash
echo 'PREVIEW_HOST' > /tmp/growth-811-host.txt
HOST=$(cat /tmp/growth-811-host.txt)
# 1) Browser-style Accept -> HTML 200
curl -sI -A "Mozilla/5.0" \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' \
"https://$HOST/docs/guides/auth"
# 2) Accept: text/markdown -> markdown 200
curl -sI -H 'Accept: text/markdown' "https://$HOST/docs/guides/auth"
# 3) text/html;q=1.0, text/markdown;q=0.5 -> HTML 200
curl -sI -H 'Accept: text/html;q=1.0, text/markdown;q=0.5' "https://$HOST/docs/guides/auth"
# 4) unsupported Accept -> 406 + Cache-Control: no-store + Vary: Accept
curl -sI -H 'Accept: application/x-content-negotiation-probe' "https://$HOST/docs/guides/auth"
# 5) User-Agent: Claude-User/1.0 (any Accept) -> markdown 200
curl -sI -A 'Claude-User/1.0' "https://$HOST/docs/guides/auth"
```
### After merge
Run
[acceptmarkdown.com/readiness-check](https://acceptmarkdown.com/readiness-check)
against `https://supabase.com/docs/guides/auth`: expect 100/100.
## Linear
- fixes GROWTH-811
This commit is contained in:
150
apps/docs/middleware.test.ts
Normal file
150
apps/docs/middleware.test.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
import { NextRequest } from 'next/server'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { middleware } from './middleware'
|
||||
|
||||
// BASE_PATH defaults to '/docs' when NEXT_PUBLIC_BASE_PATH is unset, so test
|
||||
// paths include the /docs prefix to match the middleware's GUIDES_PATH check.
|
||||
function makeRequest(
|
||||
path: string,
|
||||
{ accept, userAgent }: { accept?: string; userAgent?: string } = {}
|
||||
): NextRequest {
|
||||
const headers: Record<string, string> = {}
|
||||
if (accept) headers.accept = accept
|
||||
if (userAgent) headers['user-agent'] = userAgent
|
||||
return new NextRequest(new URL(path, 'https://supabase.com'), { headers })
|
||||
}
|
||||
|
||||
const REWRITE_HEADER = 'x-middleware-rewrite'
|
||||
const GUIDES_MD_REWRITE = (slug: string) => `https://supabase.com/docs/api/guides-md/${slug}`
|
||||
|
||||
describe('docs middleware — /guides/* content negotiation', () => {
|
||||
it('rewrites to /api/guides-md/<slug> when Accept includes text/markdown', () => {
|
||||
const req = makeRequest('/docs/guides/auth', { accept: 'text/markdown' })
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
})
|
||||
|
||||
it('rewrites /<slug>.md to /api/guides-md/<slug> regardless of Accept', () => {
|
||||
for (const accept of [undefined, 'text/html']) {
|
||||
const req = makeRequest('/docs/guides/auth.md', accept ? { accept } : {})
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
}
|
||||
})
|
||||
|
||||
it('serves HTML for browser-style Accept', () => {
|
||||
const req = makeRequest('/docs/guides/auth', {
|
||||
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
})
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBeNull()
|
||||
})
|
||||
|
||||
it('serves markdown when md q-value beats html', () => {
|
||||
const req = makeRequest('/docs/guides/auth', {
|
||||
accept: 'text/html;q=0.5, text/markdown;q=1.0',
|
||||
})
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
})
|
||||
|
||||
it('falls through to HTML when no Accept header is sent', () => {
|
||||
expect(middleware(makeRequest('/docs/guides/auth')).headers.get(REWRITE_HEADER)).toBeNull()
|
||||
})
|
||||
|
||||
it('falls through to HTML for bare Accept: */* (no explicit md preference)', () => {
|
||||
const req = makeRequest('/docs/guides/auth', { accept: '*/*' })
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBeNull()
|
||||
})
|
||||
|
||||
it('does not serve markdown when client explicitly rejects it (q=0)', () => {
|
||||
const req = makeRequest('/docs/guides/auth', {
|
||||
accept: 'text/markdown;q=0, text/html;q=1.0',
|
||||
})
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBeNull()
|
||||
})
|
||||
|
||||
it('tolerates OWS in q-params and clamps out-of-range q-values', () => {
|
||||
// OWS around q: html wins.
|
||||
const ows = makeRequest('/docs/guides/auth', {
|
||||
accept: 'text/html ; q = 1.0, text/markdown ; q = 0.5',
|
||||
})
|
||||
expect(middleware(ows).headers.get(REWRITE_HEADER)).toBeNull()
|
||||
// q=2.0 is out-of-range, falls back to default 1.0; tie -> markdown.
|
||||
const oor = makeRequest('/docs/guides/auth', {
|
||||
accept: 'text/html;q=2.0, text/markdown;q=1.0',
|
||||
})
|
||||
expect(middleware(oor).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
})
|
||||
|
||||
it('returns 406 with Cache-Control: no-store and Vary: Accept when Accept excludes every type', () => {
|
||||
const req = makeRequest('/docs/guides/auth', {
|
||||
accept: 'application/x-content-negotiation-probe',
|
||||
})
|
||||
const res = middleware(req)
|
||||
expect(res.status).toBe(406)
|
||||
expect(res.headers.get('Cache-Control')).toBe('no-store')
|
||||
expect(res.headers.get('Vary')).toBe('Accept')
|
||||
})
|
||||
|
||||
it('does not 406 for LLM UAs or .md suffix paths even with a probe Accept', () => {
|
||||
const llm = makeRequest('/docs/guides/auth', {
|
||||
accept: 'application/x-content-negotiation-probe',
|
||||
userAgent: 'Claude-User/1.0',
|
||||
})
|
||||
expect(middleware(llm).status).not.toBe(406)
|
||||
expect(middleware(llm).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
|
||||
const md = makeRequest('/docs/guides/auth.md', {
|
||||
accept: 'application/x-content-negotiation-probe',
|
||||
})
|
||||
expect(middleware(md).status).not.toBe(406)
|
||||
expect(middleware(md).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
})
|
||||
|
||||
it('does not 406 on /reference/* (negotiation contract is /guides/* only)', () => {
|
||||
const req = makeRequest('/docs/reference/javascript/introduction', {
|
||||
accept: 'application/x-content-negotiation-probe',
|
||||
})
|
||||
expect(middleware(req).status).not.toBe(406)
|
||||
})
|
||||
|
||||
it('rewrites for each LLM user agent', () => {
|
||||
for (const ua of [
|
||||
'Claude-User (claude-code/2.1.119; +https://support.anthropic.com/)',
|
||||
'Claude-Web/1.0',
|
||||
'Mozilla/5.0 (compatible; ChatGPT-User/1.0)',
|
||||
'PerplexityBot/1.0',
|
||||
]) {
|
||||
const req = makeRequest('/docs/guides/auth', { userAgent: ua })
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
}
|
||||
})
|
||||
|
||||
it('LLM UA overrides an Accept header that prefers HTML', () => {
|
||||
const req = makeRequest('/docs/guides/auth', {
|
||||
accept: 'text/html;q=1.0, text/markdown;q=0.1',
|
||||
userAgent: 'Claude-User/1.0',
|
||||
})
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBe(GUIDES_MD_REWRITE('auth'))
|
||||
})
|
||||
|
||||
it('falls through for non-LLM UAs (browsers, training crawlers, substring embeds)', () => {
|
||||
for (const ua of [
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
'GPTBot/1.0',
|
||||
'ClaudeBot/1.0',
|
||||
'CCBot/2.0',
|
||||
'chatgpt-userscript/2.0',
|
||||
'NotPerplexityBot',
|
||||
]) {
|
||||
const req = makeRequest('/docs/guides/auth', { userAgent: ua })
|
||||
expect(middleware(req).headers.get(REWRITE_HEADER)).toBeNull()
|
||||
}
|
||||
})
|
||||
|
||||
it('does not apply LLM UA rewrite to /reference/* (guides-only)', () => {
|
||||
const req = makeRequest('/docs/reference/javascript/introduction', {
|
||||
userAgent: 'Claude-User/1.0',
|
||||
})
|
||||
const rewrite = middleware(req).headers.get(REWRITE_HEADER) ?? ''
|
||||
expect(rewrite).not.toContain('/api/guides-md/')
|
||||
})
|
||||
})
|
||||
@@ -1,33 +1,47 @@
|
||||
import { clientSdkIds } from '~/content/navigation.references'
|
||||
import { BASE_PATH } from '~/lib/constants'
|
||||
import { negotiateMarkdown } from 'common/markdown-negotiation'
|
||||
import { isbot } from 'isbot'
|
||||
import { NextResponse, type NextRequest } from 'next/server'
|
||||
|
||||
import { clientSdkIds } from '~/content/navigation.references'
|
||||
import { BASE_PATH } from '~/lib/constants'
|
||||
|
||||
const REFERENCE_PATH = `${BASE_PATH ?? ''}/reference`
|
||||
|
||||
const GUIDES_PATH = `${BASE_PATH ?? ''}/guides`
|
||||
|
||||
export function middleware(request: NextRequest) {
|
||||
const url = new URL(request.url)
|
||||
const { pathname } = url
|
||||
|
||||
const requestsMarkdown =
|
||||
request.headers.get('Accept')?.includes('text/markdown') || url.pathname.endsWith('.md')
|
||||
if (pathname.startsWith(GUIDES_PATH + '/')) {
|
||||
const isMdSuffix = pathname.endsWith('.md')
|
||||
const slug = pathname.replace(`${GUIDES_PATH}/`, '').replace(/\.md$/, '')
|
||||
const decision = negotiateMarkdown(
|
||||
{
|
||||
acceptHeader: request.headers.get('accept') ?? '',
|
||||
userAgent: request.headers.get('user-agent') ?? '',
|
||||
},
|
||||
{ hasMarkdownVariant: true, isMarkdownSuffix: isMdSuffix }
|
||||
)
|
||||
|
||||
// Serve pre-generated .md files before the [[...slug]] page route can intercept them
|
||||
if (url.pathname.startsWith(GUIDES_PATH + '/') && requestsMarkdown) {
|
||||
const slug = url.pathname.replace(`${GUIDES_PATH}/`, '').replace(/\.md$/, '')
|
||||
const rewriteUrl = new URL(url)
|
||||
rewriteUrl.pathname = `${BASE_PATH ?? ''}/api/guides-md/${slug}`
|
||||
return NextResponse.rewrite(rewriteUrl)
|
||||
if (decision === 'not-acceptable') {
|
||||
return new NextResponse('Not Acceptable', {
|
||||
status: 406,
|
||||
headers: { 'Cache-Control': 'no-store', Vary: 'Accept' },
|
||||
})
|
||||
}
|
||||
|
||||
if (decision === 'markdown') {
|
||||
const rewriteUrl = new URL(url)
|
||||
rewriteUrl.pathname = `${BASE_PATH ?? ''}/api/guides-md/${slug}`
|
||||
return NextResponse.rewrite(rewriteUrl)
|
||||
}
|
||||
}
|
||||
|
||||
if (!url.pathname.startsWith(REFERENCE_PATH)) {
|
||||
if (!pathname.startsWith(REFERENCE_PATH)) {
|
||||
return NextResponse.next()
|
||||
}
|
||||
|
||||
if (isbot(request.headers.get('user-agent'))) {
|
||||
let [, lib, maybeVersion, ...slug] = url.pathname.replace(REFERENCE_PATH, '').split('/')
|
||||
let [, lib, maybeVersion, ...slug] = pathname.replace(REFERENCE_PATH, '').split('/')
|
||||
|
||||
if (clientSdkIds.includes(lib)) {
|
||||
const version = /v\d+/.test(maybeVersion) ? maybeVersion : undefined
|
||||
@@ -43,7 +57,7 @@ export function middleware(request: NextRequest) {
|
||||
}
|
||||
}
|
||||
|
||||
const [, lib, maybeVersion] = url.pathname.replace(REFERENCE_PATH, '').split('/')
|
||||
const [, lib, maybeVersion] = pathname.replace(REFERENCE_PATH, '').split('/')
|
||||
|
||||
if (lib === 'cli') {
|
||||
const rewritePath = [REFERENCE_PATH, 'cli'].join('/')
|
||||
|
||||
@@ -1,56 +1,9 @@
|
||||
import { stampFirstReferrerCookie } from 'common/first-referrer-cookie'
|
||||
import { negotiateMarkdown } from 'common/markdown-negotiation'
|
||||
import { NextResponse, type NextRequest } from 'next/server'
|
||||
|
||||
import { MD_PAGES } from './app/api-v2/md/content.generated'
|
||||
|
||||
// Live-fetch agents only. Training crawlers (GPTBot, ClaudeBot, CCBot) are
|
||||
// governed by robots.txt; serving them content that differs from the HTML
|
||||
// page risks SEO and cloaking penalties.
|
||||
const LLM_USER_AGENT = /\bClaude-User\b|\bClaude-Web\b|\bChatGPT-User\b|\bPerplexityBot\b/i
|
||||
|
||||
// Media ranges (RFC 9110 §5.3.2) ordered most to least specific.
|
||||
const RANGES = ['text/markdown', 'text/html', 'text/*', '*/*'] as const
|
||||
type Range = (typeof RANGES)[number]
|
||||
|
||||
const Q_PARAM = /^\s*q\s*=\s*([\d.]+)\s*$/i
|
||||
|
||||
function isRange(s: string): s is Range {
|
||||
return (RANGES as readonly string[]).includes(s)
|
||||
}
|
||||
|
||||
function parseQ(params: string[]): number {
|
||||
for (const p of params) {
|
||||
const q = parseFloat(p.match(Q_PARAM)?.[1] ?? '')
|
||||
if (Number.isFinite(q) && q >= 0 && q <= 1) return q
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// `markdownExplicit` lets the caller avoid flipping a bare `Accept: */*` to
|
||||
// markdown — generic clients sending */* aren't expressing a preference.
|
||||
function parseAccept(header: string) {
|
||||
const seen = new Map<Range, number>()
|
||||
|
||||
for (const entry of header.toLowerCase().split(',')) {
|
||||
const [rawType, ...params] = entry.trim().split(';')
|
||||
const range = rawType.trim()
|
||||
if (!isRange(range)) continue
|
||||
seen.set(range, Math.max(seen.get(range) ?? -1, parseQ(params)))
|
||||
}
|
||||
|
||||
return {
|
||||
html: seen.get('text/html') ?? seen.get('text/*') ?? seen.get('*/*') ?? 0,
|
||||
markdown: seen.get('text/markdown') ?? seen.get('text/*') ?? seen.get('*/*') ?? 0,
|
||||
markdownExplicit: seen.has('text/markdown') || seen.has('text/*'),
|
||||
}
|
||||
}
|
||||
|
||||
function shouldServeMarkdown(accept: ReturnType<typeof parseAccept>): boolean {
|
||||
if (accept.markdown === 0) return false
|
||||
if (accept.markdown > accept.html) return true
|
||||
return accept.markdown === accept.html && accept.markdownExplicit
|
||||
}
|
||||
|
||||
export function middleware(request: NextRequest) {
|
||||
const { pathname } = request.nextUrl
|
||||
|
||||
@@ -61,37 +14,28 @@ export function middleware(request: NextRequest) {
|
||||
}
|
||||
}
|
||||
|
||||
const acceptHeader = request.headers.get('accept') ?? ''
|
||||
// Cap UA length before regex test to bound CPU on the edge hot path.
|
||||
const userAgent = (request.headers.get('user-agent') ?? '').slice(0, 512)
|
||||
const isLlmAgent = LLM_USER_AGENT.test(userAgent)
|
||||
const accept = acceptHeader ? parseAccept(acceptHeader) : null
|
||||
|
||||
// Strip trailing slash so /auth/ and /auth resolve to the same allowlist
|
||||
// entry — NextURL preserves trailing-slash style on rewrite targets.
|
||||
const slug = (pathname === '/' ? 'homepage' : pathname.slice(1)).replace(/\/$/, '')
|
||||
const isMdEligible = MD_PAGES.has(slug)
|
||||
const isChangelogEntry = slug === 'changelog' || /^changelog\/\d+/.test(slug)
|
||||
const hasMdVariant = isMdEligible || isChangelogEntry
|
||||
|
||||
// 406 when Accept rejects every type we can produce. Skip for LLM UAs
|
||||
// (always served markdown) and clients with no Accept (browser default).
|
||||
if (
|
||||
hasMdVariant &&
|
||||
!isLlmAgent &&
|
||||
accept !== null &&
|
||||
accept.markdown === 0 &&
|
||||
accept.html === 0
|
||||
) {
|
||||
const decision = negotiateMarkdown(
|
||||
{
|
||||
acceptHeader: request.headers.get('accept') ?? '',
|
||||
userAgent: request.headers.get('user-agent') ?? '',
|
||||
},
|
||||
{ hasMarkdownVariant: isMdEligible || isChangelogEntry }
|
||||
)
|
||||
|
||||
if (decision === 'not-acceptable') {
|
||||
return new NextResponse('Not Acceptable', {
|
||||
status: 406,
|
||||
headers: { 'Cache-Control': 'no-store', Vary: 'Accept' },
|
||||
})
|
||||
}
|
||||
|
||||
const wantsMarkdown = isLlmAgent || (accept !== null && shouldServeMarkdown(accept))
|
||||
|
||||
if (wantsMarkdown) {
|
||||
if (decision === 'markdown') {
|
||||
if (isMdEligible) {
|
||||
return NextResponse.rewrite(new URL(`/api-v2/md/${slug}`, request.nextUrl))
|
||||
}
|
||||
|
||||
167
packages/common/markdown-negotiation.test.ts
Normal file
167
packages/common/markdown-negotiation.test.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { negotiateMarkdown } from './markdown-negotiation'
|
||||
|
||||
const BROWSER_ACCEPT = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
|
||||
describe('negotiateMarkdown', () => {
|
||||
describe('hasMarkdownVariant gate', () => {
|
||||
it('passes when the route has no markdown variant, regardless of other signals', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/markdown', userAgent: 'Claude-User/1.0' },
|
||||
{ hasMarkdownVariant: false, isMarkdownSuffix: true }
|
||||
)
|
||||
).toBe('pass')
|
||||
})
|
||||
})
|
||||
|
||||
describe('forced markdown', () => {
|
||||
it('returns markdown for LLM user agents even when Accept rejects everything', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'application/x-content-negotiation-probe', userAgent: 'Claude-User/1.0' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('markdown')
|
||||
})
|
||||
|
||||
it('returns markdown for an explicit .md suffix even with an HTML-only Accept', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/html', userAgent: '' },
|
||||
{ hasMarkdownVariant: true, isMarkdownSuffix: true }
|
||||
)
|
||||
).toBe('markdown')
|
||||
})
|
||||
|
||||
it.each([
|
||||
'Claude-User (claude-code/2.1.119; +https://support.anthropic.com/)',
|
||||
'Claude-Web/1.0',
|
||||
'Mozilla/5.0 (compatible; ChatGPT-User/1.0)',
|
||||
'PerplexityBot/1.0',
|
||||
])('treats %s as an LLM agent', (userAgent) => {
|
||||
expect(negotiateMarkdown({ acceptHeader: '', userAgent }, { hasMarkdownVariant: true })).toBe(
|
||||
'markdown'
|
||||
)
|
||||
})
|
||||
|
||||
it.each([
|
||||
'GPTBot/1.0',
|
||||
'ClaudeBot/1.0',
|
||||
'CCBot/2.0',
|
||||
'chatgpt-userscript/2.0',
|
||||
'NotPerplexityBot',
|
||||
])('does not treat %s (training crawler / substring embed) as an LLM agent', (userAgent) => {
|
||||
expect(negotiateMarkdown({ acceptHeader: '', userAgent }, { hasMarkdownVariant: true })).toBe(
|
||||
'pass'
|
||||
)
|
||||
})
|
||||
|
||||
it('caps user-agent length before matching', () => {
|
||||
const padded = 'x'.repeat(600) + 'Claude-User'
|
||||
expect(
|
||||
negotiateMarkdown({ acceptHeader: '', userAgent: padded }, { hasMarkdownVariant: true })
|
||||
).toBe('pass')
|
||||
})
|
||||
})
|
||||
|
||||
describe('no Accept header', () => {
|
||||
it('passes (serves HTML) when no Accept header is sent', () => {
|
||||
expect(
|
||||
negotiateMarkdown({ acceptHeader: '', userAgent: '' }, { hasMarkdownVariant: true })
|
||||
).toBe('pass')
|
||||
})
|
||||
})
|
||||
|
||||
describe('406 not-acceptable', () => {
|
||||
it('returns not-acceptable when Accept excludes every type we serve', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'application/x-content-negotiation-probe', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('not-acceptable')
|
||||
})
|
||||
|
||||
it('does not 406 for bare */*', () => {
|
||||
expect(
|
||||
negotiateMarkdown({ acceptHeader: '*/*', userAgent: '' }, { hasMarkdownVariant: true })
|
||||
).toBe('pass')
|
||||
})
|
||||
})
|
||||
|
||||
describe('q-value negotiation', () => {
|
||||
it('serves HTML for browser-style Accept', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: BROWSER_ACCEPT, userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('pass')
|
||||
})
|
||||
|
||||
it('serves markdown when explicitly requested', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/markdown', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('markdown')
|
||||
})
|
||||
|
||||
it('serves markdown when its q-value beats html', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/html;q=0.5, text/markdown;q=1.0', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('markdown')
|
||||
})
|
||||
|
||||
it('serves HTML when its q-value beats markdown', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/html;q=1.0, text/markdown;q=0.5', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('pass')
|
||||
})
|
||||
|
||||
it('breaks an explicit md/html tie toward markdown', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/markdown, text/html, */*', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('markdown')
|
||||
})
|
||||
|
||||
it('does not serve markdown when the client rejects it (q=0)', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/markdown;q=0, text/html;q=1.0', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('pass')
|
||||
})
|
||||
|
||||
it('tolerates OWS around the q parameter (RFC 9110)', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/html ; q = 1.0, text/markdown ; q = 0.5', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('pass')
|
||||
})
|
||||
|
||||
it('ignores out-of-range q-values (falls back to 1.0; tie -> markdown)', () => {
|
||||
expect(
|
||||
negotiateMarkdown(
|
||||
{ acceptHeader: 'text/html;q=2.0, text/markdown;q=1.0', userAgent: '' },
|
||||
{ hasMarkdownVariant: true }
|
||||
)
|
||||
).toBe('markdown')
|
||||
})
|
||||
})
|
||||
})
|
||||
86
packages/common/markdown-negotiation.ts
Normal file
86
packages/common/markdown-negotiation.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
// Live-fetch agents only. Training crawlers (GPTBot, ClaudeBot, CCBot) are
|
||||
// governed by robots.txt; serving them content that differs from the HTML
|
||||
// page risks SEO and cloaking penalties.
|
||||
const LLM_USER_AGENT = /\bClaude-User\b|\bClaude-Web\b|\bChatGPT-User\b|\bPerplexityBot\b/i
|
||||
|
||||
// Media ranges (RFC 9110 §5.3.2) ordered most to least specific.
|
||||
const RANGES = ['text/markdown', 'text/html', 'text/*', '*/*'] as const
|
||||
type Range = (typeof RANGES)[number]
|
||||
|
||||
const Q_PARAM = /^\s*q\s*=\s*([\d.]+)\s*$/i
|
||||
|
||||
// Cap UA length before the regex test to bound CPU on the edge hot path.
|
||||
const MAX_UA_LENGTH = 512
|
||||
|
||||
function isRange(s: string): s is Range {
|
||||
return (RANGES as readonly string[]).includes(s)
|
||||
}
|
||||
|
||||
function parseQ(params: string[]): number {
|
||||
for (const p of params) {
|
||||
const q = parseFloat(p.match(Q_PARAM)?.[1] ?? '')
|
||||
if (Number.isFinite(q) && q >= 0 && q <= 1) return q
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// `markdownExplicit` lets callers avoid flipping a bare `Accept: */*` to
|
||||
// markdown — generic clients sending */* aren't expressing a preference.
|
||||
function parseAccept(header: string) {
|
||||
const seen = new Map<Range, number>()
|
||||
|
||||
for (const entry of header.toLowerCase().split(',')) {
|
||||
const [rawType, ...params] = entry.trim().split(';')
|
||||
const range = rawType.trim()
|
||||
if (!isRange(range)) continue
|
||||
seen.set(range, Math.max(seen.get(range) ?? -1, parseQ(params)))
|
||||
}
|
||||
|
||||
return {
|
||||
html: seen.get('text/html') ?? seen.get('text/*') ?? seen.get('*/*') ?? 0,
|
||||
markdown: seen.get('text/markdown') ?? seen.get('text/*') ?? seen.get('*/*') ?? 0,
|
||||
markdownExplicit: seen.has('text/markdown') || seen.has('text/*'),
|
||||
}
|
||||
}
|
||||
|
||||
function shouldServeMarkdown(accept: ReturnType<typeof parseAccept>): boolean {
|
||||
if (accept.markdown === 0) return false
|
||||
if (accept.markdown > accept.html) return true
|
||||
return accept.markdown === accept.html && accept.markdownExplicit
|
||||
}
|
||||
|
||||
export type MarkdownDecision = 'markdown' | 'not-acceptable' | 'pass'
|
||||
|
||||
/**
|
||||
* Content negotiation for routes that can serve either HTML or markdown.
|
||||
*
|
||||
* `hasMarkdownVariant` is false for paths with no markdown representation (they
|
||||
* never negotiate). `isMarkdownSuffix` forces markdown for an explicit `.md`
|
||||
* request; callers that handle `.md` upstream can leave it false.
|
||||
*/
|
||||
export function negotiateMarkdown(
|
||||
{ acceptHeader, userAgent }: { acceptHeader: string; userAgent: string },
|
||||
{
|
||||
hasMarkdownVariant,
|
||||
isMarkdownSuffix = false,
|
||||
}: { hasMarkdownVariant: boolean; isMarkdownSuffix?: boolean }
|
||||
): MarkdownDecision {
|
||||
if (!hasMarkdownVariant) return 'pass'
|
||||
|
||||
// LLM agents and an explicit `.md` request always get markdown.
|
||||
if (LLM_USER_AGENT.test(userAgent.slice(0, MAX_UA_LENGTH)) || isMarkdownSuffix) {
|
||||
return 'markdown'
|
||||
}
|
||||
|
||||
// No Accept header = browser/default client: serve HTML, never 406.
|
||||
if (!acceptHeader) return 'pass'
|
||||
|
||||
const accept = parseAccept(acceptHeader)
|
||||
|
||||
// 406 when Accept rejects every type this route can produce. Only reached for
|
||||
// non-LLM, non-`.md` clients that sent an Accept header (guards above), so a
|
||||
// deliberate `Accept: application/json` gets a clean 406 instead of HTML.
|
||||
if (accept.markdown === 0 && accept.html === 0) return 'not-acceptable'
|
||||
|
||||
return shouldServeMarkdown(accept) ? 'markdown' : 'pass'
|
||||
}
|
||||
Reference in New Issue
Block a user