mirror of
https://github.com/supabase/supabase.git
synced 2026-05-30 17:32:00 +08:00
## Problem GitHub OAuth redirects and Google SSO set the browser's Referer header to their domain when redirecting back to supabase.com. Our attribution pipeline treats these as genuine referral traffic, inflating the `github` channel by ~20K orgs/week. The internal referrer fix (GROWTH-647) surfaced this by reducing `unknown-internal` — it didn't cause the issue, it revealed OAuth noise that was previously hidden. ## What happened When users sign in with GitHub, the browser sends `Referer: https://github.com/`. GitHub's login pages use `origin-when-cross-origin` Referrer-Policy, which strips the path. So OAuth redirects arrive as bare `github.com/` — indistinguishable from a direct visit to github.com. Meanwhile, genuine GitHub referrals from repos/READMEs always include the full path because those pages use `no-referrer-when-downgrade`. We validated against `mart_marketing_organization_attribution`: 98.5% of GitHub-attributed orgs have bare `github.com/` as the referrer. Only ~250/week have specific paths (genuine referrals). ## Changes - Added `isOAuthRedirectReferrer()` to `first-referrer-cookie.ts` — identifies auth provider redirects: - `accounts.google.com` blocked entirely (dedicated SSO subdomain) - Bare `github.com/` blocked (OAuth redirect signature) - `github.com/<specific-path>` preserved (genuine repo/README referrals) - Wired into `shouldRefreshCookie()` so OAuth referrers never get stamped into cookies - Wired into `handlePageTelemetry()` referrer overrides as defense-in-depth - 17 new tests covering all OAuth patterns and edge cases ## Testing All 52 tests pass. New tests cover Google SSO (bare + with path), GitHub bare domain (with/without trailing slash), genuine GitHub referrals (repo, README, discussion, blob), explicit OAuth path, non-OAuth domains, empty/malformed URLs. Verified TDD — tests failed red before implementation, green after. Companion dbt PR in data-engineering handles historical data. GROWTH-732
406 lines
13 KiB
TypeScript
406 lines
13 KiB
TypeScript
/**
|
|
* Shared utilities for the cross-app first-referrer handoff cookie.
|
|
*
|
|
* The `_sb_first_referrer` cookie is written by edge middleware on `apps/www`,
|
|
* `apps/docs`, and `apps/studio` when a user arrives from an
|
|
* external source. Studio reads it on the first telemetry pageview to recover
|
|
* external attribution context that would otherwise be lost at the app boundary.
|
|
*
|
|
* The cookie is normally write-once (365-day TTL, domain=supabase.com), but is
|
|
* refreshed when a returning visitor arrives with paid traffic signals (click IDs
|
|
* or paid UTM medium values) to ensure paid attribution overrides stale organic data.
|
|
*/
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Structural types for Next.js middleware request/response
|
|
// ---------------------------------------------------------------------------
|
|
// Using structural interfaces instead of importing NextRequest/NextResponse
|
|
// avoids version conflicts when different apps pin different Next.js versions
|
|
// (e.g. studio on Next 15, docs/www on Next 16).
|
|
|
|
interface MiddlewareRequest {
|
|
headers: { get(name: string): string | null }
|
|
cookies: { has(name: string): boolean }
|
|
url: string
|
|
nextUrl: { hostname: string }
|
|
}
|
|
|
|
interface MiddlewareResponse {
|
|
cookies: {
|
|
set(
|
|
name: string,
|
|
value: string,
|
|
options?: {
|
|
path?: string
|
|
sameSite?: 'lax' | 'strict' | 'none'
|
|
secure?: boolean
|
|
domain?: string
|
|
maxAge?: number
|
|
}
|
|
): void
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export const FIRST_REFERRER_COOKIE_NAME = '_sb_first_referrer'
|
|
|
|
/**
|
|
* Short-lived (60s) diagnostic cookie written by www middleware on /dashboard and /docs paths.
|
|
* Encodes: hit=1&would_stamp={0|1}&has_cookie={0|1}
|
|
* Read by Studio telemetry to report middleware reach and attribution signals to PostHog.
|
|
*/
|
|
export const MW_DIAG_COOKIE_NAME = '_sb_mw_diag'
|
|
|
|
/** 365 days in seconds */
|
|
export const FIRST_REFERRER_COOKIE_MAX_AGE = 365 * 24 * 60 * 60
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export interface FirstReferrerData {
|
|
/** The external referrer URL (e.g. https://www.google.com/) */
|
|
referrer: string
|
|
/** The landing URL on our site when the external referrer was captured */
|
|
landing_url: string
|
|
/** UTM params parsed from the landing URL (e.g. utm_source, utm_medium) */
|
|
utms: Record<string, string>
|
|
/** Ad-network click IDs parsed from the landing URL */
|
|
click_ids: Record<string, string>
|
|
/** Unix timestamp (ms) when the cookie was written */
|
|
ts: number
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Referrer classification
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Returns true if the referrer URL points to an external (non-Supabase) domain.
|
|
* Handles malformed URLs gracefully by returning false.
|
|
*/
|
|
export function isExternalReferrer(referrer: string): boolean {
|
|
if (!referrer) return false
|
|
try {
|
|
const hostname = new URL(referrer).hostname
|
|
return hostname !== 'supabase.com' && !hostname.endsWith('.supabase.com')
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if the referrer URL is an OAuth/SSO redirect that should NOT
|
|
* be treated as a genuine traffic source.
|
|
*
|
|
* A referrer should reflect how someone discovered Supabase, not how they
|
|
* authenticated. This function identifies auth provider redirects:
|
|
* - accounts.google.com — blocked entirely (dedicated SSO subdomain)
|
|
* - github.com with no path (bare domain) — blocked (OAuth strips the path
|
|
* via origin-when-cross-origin Referrer-Policy)
|
|
* - github.com/login/oauth/* — blocked (explicit OAuth path, rare)
|
|
* - github.com with a specific path — allowed (genuine repo/README referrals)
|
|
*/
|
|
export function isOAuthRedirectReferrer(referrer: string): boolean {
|
|
if (!referrer) return false
|
|
try {
|
|
const url = new URL(referrer)
|
|
const hostname = url.hostname
|
|
|
|
// Google SSO — entire subdomain is auth traffic
|
|
if (hostname === 'accounts.google.com') return true
|
|
|
|
// GitHub — bare domain (no meaningful path) is OAuth redirect noise
|
|
if (hostname === 'github.com') {
|
|
const path = url.pathname
|
|
if (path === '/') return true
|
|
// Explicit OAuth path (rare — GitHub usually strips this)
|
|
if (path.startsWith('/login/oauth')) return true
|
|
// Any other path = genuine referral (README, repo, discussion, etc.)
|
|
return false
|
|
}
|
|
|
|
return false
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// UTM + click-ID extraction
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const UTM_KEYS = ['utm_source', 'utm_medium', 'utm_campaign', 'utm_content', 'utm_term'] as const
|
|
|
|
const CLICK_ID_KEYS = [
|
|
'gclid', // Google Ads
|
|
'gbraid', // Google Ads (iOS)
|
|
'wbraid', // Google Ads (iOS)
|
|
'msclkid', // Microsoft Ads (Bing)
|
|
'fbclid', // Meta (Facebook/Instagram)
|
|
'rdt_cid', // Reddit Ads
|
|
'ttclid', // TikTok Ads
|
|
'twclid', // X Ads (Twitter)
|
|
'li_fat_id', // LinkedIn Ads
|
|
] as const
|
|
|
|
function pickParams(
|
|
searchParams: URLSearchParams,
|
|
keys: readonly string[]
|
|
): Record<string, string> {
|
|
const result: Record<string, string> = {}
|
|
for (const key of keys) {
|
|
const value = searchParams.get(key)
|
|
if (value) {
|
|
result[key] = value
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
function toStringRecord(value: unknown): Record<string, string> {
|
|
if (!value || typeof value !== 'object') return {}
|
|
|
|
return Object.fromEntries(
|
|
Object.entries(value as Record<string, unknown>).filter(
|
|
([key, v]) => typeof key === 'string' && typeof v === 'string'
|
|
)
|
|
) as Record<string, string>
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Build cookie payload from a request (edge-compatible)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Build a `FirstReferrerData` payload from raw request values.
|
|
* Intended for use in Next.js middleware where `document` is not available.
|
|
*/
|
|
export function buildFirstReferrerData({
|
|
referrer,
|
|
landingUrl,
|
|
}: {
|
|
referrer: string
|
|
landingUrl: string
|
|
}): FirstReferrerData {
|
|
let utms: Record<string, string> = {}
|
|
let click_ids: Record<string, string> = {}
|
|
|
|
try {
|
|
const url = new URL(landingUrl)
|
|
utms = pickParams(url.searchParams, UTM_KEYS)
|
|
click_ids = pickParams(url.searchParams, CLICK_ID_KEYS)
|
|
} catch {
|
|
// If landing URL is malformed, just skip param extraction
|
|
}
|
|
|
|
return {
|
|
referrer,
|
|
landing_url: landingUrl,
|
|
utms,
|
|
click_ids,
|
|
ts: Date.now(),
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Serialize / parse
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export function serializeFirstReferrerCookie(data: FirstReferrerData): string {
|
|
return JSON.stringify(data)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Paid-signal detection
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const PAID_UTM_MEDIUMS = new Set([
|
|
'cpc',
|
|
'ppc',
|
|
'paid_search',
|
|
'paidsocial',
|
|
'paid_social',
|
|
'display',
|
|
])
|
|
|
|
/**
|
|
* Returns true if the URL contains ad-network click IDs or paid UTM medium values.
|
|
* These indicate the user arrived via a paid campaign, which should override
|
|
* stale organic attribution.
|
|
*/
|
|
export function hasPaidSignals(url: URL): boolean {
|
|
for (const key of CLICK_ID_KEYS) {
|
|
if (url.searchParams.has(key)) return true
|
|
}
|
|
const medium = url.searchParams.get('utm_medium')?.toLowerCase()
|
|
return medium !== undefined && PAID_UTM_MEDIUMS.has(medium)
|
|
}
|
|
|
|
/**
|
|
* Decides whether the first-referrer cookie should be (re-)stamped.
|
|
*
|
|
* - No cookie + external referrer → stamp (first visit attribution)
|
|
* - No cookie + OAuth/SSO redirect referrer → skip (auth ≠ discovery)
|
|
* - Cookie exists + paid signals in URL → stamp (paid traffic refresh)
|
|
* Note: OAuth check intentionally skipped for paid refresh — the paid
|
|
* signal comes from the URL (gclid, utm_medium=cpc), not the referrer.
|
|
* - Otherwise → skip
|
|
*/
|
|
export function shouldRefreshCookie(
|
|
existingCookie: boolean,
|
|
request: { referrer: string; url: string }
|
|
): { stamp: boolean } {
|
|
if (!existingCookie) {
|
|
if (isOAuthRedirectReferrer(request.referrer)) return { stamp: false }
|
|
return { stamp: isExternalReferrer(request.referrer) }
|
|
}
|
|
|
|
try {
|
|
const url = new URL(request.url)
|
|
return { stamp: hasPaidSignals(url) }
|
|
} catch {
|
|
return { stamp: false }
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Middleware helper — shared across apps/www, apps/docs, and apps/studio
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Stamp the first-referrer cookie on a Next.js middleware response if the
|
|
* request warrants it. This is the single entry point for all app middleware
|
|
* files — call it with the incoming request and outgoing response.
|
|
*
|
|
* On *.supabase.com the cookie is set with `domain=supabase.com` so it's
|
|
* readable across all subdomains (www, docs, studio). On other hosts
|
|
* (localhost, preview deploys) the domain is left unset so the browser
|
|
* stores a host-only cookie instead of rejecting an invalid domain.
|
|
*/
|
|
export function stampFirstReferrerCookie(
|
|
request: MiddlewareRequest,
|
|
response: MiddlewareResponse
|
|
): void {
|
|
const referrer = request.headers.get('referer') ?? ''
|
|
|
|
const { stamp } = shouldRefreshCookie(request.cookies.has(FIRST_REFERRER_COOKIE_NAME), {
|
|
referrer,
|
|
url: request.url,
|
|
})
|
|
|
|
if (!stamp) return
|
|
|
|
const data = buildFirstReferrerData({
|
|
referrer,
|
|
landingUrl: request.url,
|
|
})
|
|
|
|
response.cookies.set(FIRST_REFERRER_COOKIE_NAME, serializeFirstReferrerCookie(data), {
|
|
path: '/',
|
|
sameSite: 'lax',
|
|
...(request.nextUrl.hostname === 'supabase.com' ||
|
|
request.nextUrl.hostname.endsWith('.supabase.com')
|
|
? { domain: 'supabase.com', secure: true }
|
|
: {}),
|
|
maxAge: FIRST_REFERRER_COOKIE_MAX_AGE,
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Middleware diagnostic cookie — parse (client-side)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export interface MwDiagData {
|
|
hit: boolean
|
|
would_stamp: boolean
|
|
has_existing_cookie: boolean
|
|
}
|
|
|
|
/**
|
|
* Parse the short-lived middleware diagnostic cookie written by www middleware
|
|
* on /dashboard and /docs paths. Returns null if the cookie is absent or malformed.
|
|
*/
|
|
export function parseMwDiagCookie(cookieHeader: string): MwDiagData | null {
|
|
try {
|
|
const cookies = cookieHeader.split(';')
|
|
const match = cookies.map((c) => c.trim()).find((c) => c.startsWith(`${MW_DIAG_COOKIE_NAME}=`))
|
|
|
|
if (!match) return null
|
|
|
|
const rawValue = match.slice(`${MW_DIAG_COOKIE_NAME}=`.length)
|
|
const params = new URLSearchParams(decodeURIComponent(rawValue))
|
|
|
|
if (params.get('hit') !== '1') return null
|
|
|
|
return {
|
|
hit: true,
|
|
would_stamp: params.get('would_stamp') === '1',
|
|
has_existing_cookie: params.get('has_cookie') === '1',
|
|
}
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Parse cookie from document.cookie header (client-side)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export function parseFirstReferrerCookie(cookieHeader: string): FirstReferrerData | null {
|
|
try {
|
|
const cookies = cookieHeader.split(';')
|
|
const match = cookies
|
|
.map((c) => c.trim())
|
|
.find((c) => c.startsWith(`${FIRST_REFERRER_COOKIE_NAME}=`))
|
|
|
|
if (!match) return null
|
|
|
|
const value = match.slice(`${FIRST_REFERRER_COOKIE_NAME}=`.length)
|
|
const decoded = decodeURIComponent(value)
|
|
// Handle double-encoded cookies from before the serializer fix.
|
|
// Next.js cookies.set() encodes automatically, but serializeFirstReferrerCookie
|
|
// previously called encodeURIComponent too, producing double-encoded values.
|
|
let jsonString: string
|
|
try {
|
|
JSON.parse(decoded)
|
|
jsonString = decoded
|
|
} catch {
|
|
jsonString = decodeURIComponent(decoded)
|
|
}
|
|
const parsed = JSON.parse(jsonString) as unknown
|
|
|
|
if (!parsed || typeof parsed !== 'object') return null
|
|
|
|
const parsedRecord = parsed as Record<string, unknown>
|
|
const referrer = parsedRecord.referrer
|
|
const landingUrl = parsedRecord.landing_url
|
|
|
|
if (typeof referrer !== 'string' || typeof landingUrl !== 'string') {
|
|
return null
|
|
}
|
|
|
|
const utmsRaw = parsedRecord.utms
|
|
const clickIdsRaw = parsedRecord.click_ids
|
|
const tsRaw = parsedRecord.ts
|
|
|
|
const utms = toStringRecord(utmsRaw)
|
|
const click_ids = toStringRecord(clickIdsRaw)
|
|
|
|
const ts = typeof tsRaw === 'number' && Number.isFinite(tsRaw) ? tsRaw : Date.now()
|
|
|
|
return {
|
|
referrer,
|
|
landing_url: landingUrl,
|
|
utms,
|
|
click_ids,
|
|
ts,
|
|
}
|
|
} catch {
|
|
return null
|
|
}
|
|
}
|