Files
supabase/packages/common/first-referrer-cookie.ts
Sean Oliver 273102323d feat(growth): filter OAuth/SSO redirect referrers from attribution (#44405)
## Problem

GitHub OAuth redirects and Google SSO set the browser's Referer header
to their domain when redirecting back to supabase.com. Our attribution
pipeline treats these as genuine referral traffic, inflating the
`github` channel by ~20K orgs/week. The internal referrer fix
(GROWTH-647) surfaced this by reducing `unknown-internal` — it didn't
cause the issue, it revealed OAuth noise that was previously hidden.

## What happened

When users sign in with GitHub, the browser sends `Referer:
https://github.com/`. GitHub's login pages use
`origin-when-cross-origin` Referrer-Policy, which strips the path. So
OAuth redirects arrive as bare `github.com/` — indistinguishable from a
direct visit to github.com. Meanwhile, genuine GitHub referrals from
repos/READMEs always include the full path because those pages use
`no-referrer-when-downgrade`.

We validated against `mart_marketing_organization_attribution`: 98.5% of
GitHub-attributed orgs have bare `github.com/` as the referrer. Only
~250/week have specific paths (genuine referrals).

## Changes

- Added `isOAuthRedirectReferrer()` to `first-referrer-cookie.ts` —
identifies auth provider redirects:
  - `accounts.google.com` blocked entirely (dedicated SSO subdomain)
  - Bare `github.com/` blocked (OAuth redirect signature)
- `github.com/<specific-path>` preserved (genuine repo/README referrals)
- Wired into `shouldRefreshCookie()` so OAuth referrers never get
stamped into cookies
- Wired into `handlePageTelemetry()` referrer overrides as
defense-in-depth
- 17 new tests covering all OAuth patterns and edge cases

## Testing

All 52 tests pass. New tests cover Google SSO (bare + with path), GitHub
bare domain (with/without trailing slash), genuine GitHub referrals
(repo, README, discussion, blob), explicit OAuth path, non-OAuth
domains, empty/malformed URLs. Verified TDD — tests failed red before
implementation, green after.

Companion dbt PR in data-engineering handles historical data.

GROWTH-732
2026-04-01 11:25:39 -07:00

406 lines
13 KiB
TypeScript

/**
* Shared utilities for the cross-app first-referrer handoff cookie.
*
* The `_sb_first_referrer` cookie is written by edge middleware on `apps/www`,
* `apps/docs`, and `apps/studio` when a user arrives from an
* external source. Studio reads it on the first telemetry pageview to recover
* external attribution context that would otherwise be lost at the app boundary.
*
* The cookie is normally write-once (365-day TTL, domain=supabase.com), but is
* refreshed when a returning visitor arrives with paid traffic signals (click IDs
* or paid UTM medium values) to ensure paid attribution overrides stale organic data.
*/
// ---------------------------------------------------------------------------
// Structural types for Next.js middleware request/response
// ---------------------------------------------------------------------------
// Using structural interfaces instead of importing NextRequest/NextResponse
// avoids version conflicts when different apps pin different Next.js versions
// (e.g. studio on Next 15, docs/www on Next 16).
interface MiddlewareRequest {
headers: { get(name: string): string | null }
cookies: { has(name: string): boolean }
url: string
nextUrl: { hostname: string }
}
interface MiddlewareResponse {
cookies: {
set(
name: string,
value: string,
options?: {
path?: string
sameSite?: 'lax' | 'strict' | 'none'
secure?: boolean
domain?: string
maxAge?: number
}
): void
}
}
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
export const FIRST_REFERRER_COOKIE_NAME = '_sb_first_referrer'
/**
* Short-lived (60s) diagnostic cookie written by www middleware on /dashboard and /docs paths.
* Encodes: hit=1&would_stamp={0|1}&has_cookie={0|1}
* Read by Studio telemetry to report middleware reach and attribution signals to PostHog.
*/
export const MW_DIAG_COOKIE_NAME = '_sb_mw_diag'
/** 365 days in seconds */
export const FIRST_REFERRER_COOKIE_MAX_AGE = 365 * 24 * 60 * 60
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface FirstReferrerData {
/** The external referrer URL (e.g. https://www.google.com/) */
referrer: string
/** The landing URL on our site when the external referrer was captured */
landing_url: string
/** UTM params parsed from the landing URL (e.g. utm_source, utm_medium) */
utms: Record<string, string>
/** Ad-network click IDs parsed from the landing URL */
click_ids: Record<string, string>
/** Unix timestamp (ms) when the cookie was written */
ts: number
}
// ---------------------------------------------------------------------------
// Referrer classification
// ---------------------------------------------------------------------------
/**
* Returns true if the referrer URL points to an external (non-Supabase) domain.
* Handles malformed URLs gracefully by returning false.
*/
export function isExternalReferrer(referrer: string): boolean {
if (!referrer) return false
try {
const hostname = new URL(referrer).hostname
return hostname !== 'supabase.com' && !hostname.endsWith('.supabase.com')
} catch {
return false
}
}
/**
* Returns true if the referrer URL is an OAuth/SSO redirect that should NOT
* be treated as a genuine traffic source.
*
* A referrer should reflect how someone discovered Supabase, not how they
* authenticated. This function identifies auth provider redirects:
* - accounts.google.com — blocked entirely (dedicated SSO subdomain)
* - github.com with no path (bare domain) — blocked (OAuth strips the path
* via origin-when-cross-origin Referrer-Policy)
* - github.com/login/oauth/* — blocked (explicit OAuth path, rare)
* - github.com with a specific path — allowed (genuine repo/README referrals)
*/
export function isOAuthRedirectReferrer(referrer: string): boolean {
if (!referrer) return false
try {
const url = new URL(referrer)
const hostname = url.hostname
// Google SSO — entire subdomain is auth traffic
if (hostname === 'accounts.google.com') return true
// GitHub — bare domain (no meaningful path) is OAuth redirect noise
if (hostname === 'github.com') {
const path = url.pathname
if (path === '/') return true
// Explicit OAuth path (rare — GitHub usually strips this)
if (path.startsWith('/login/oauth')) return true
// Any other path = genuine referral (README, repo, discussion, etc.)
return false
}
return false
} catch {
return false
}
}
// ---------------------------------------------------------------------------
// UTM + click-ID extraction
// ---------------------------------------------------------------------------
const UTM_KEYS = ['utm_source', 'utm_medium', 'utm_campaign', 'utm_content', 'utm_term'] as const
const CLICK_ID_KEYS = [
'gclid', // Google Ads
'gbraid', // Google Ads (iOS)
'wbraid', // Google Ads (iOS)
'msclkid', // Microsoft Ads (Bing)
'fbclid', // Meta (Facebook/Instagram)
'rdt_cid', // Reddit Ads
'ttclid', // TikTok Ads
'twclid', // X Ads (Twitter)
'li_fat_id', // LinkedIn Ads
] as const
function pickParams(
searchParams: URLSearchParams,
keys: readonly string[]
): Record<string, string> {
const result: Record<string, string> = {}
for (const key of keys) {
const value = searchParams.get(key)
if (value) {
result[key] = value
}
}
return result
}
function toStringRecord(value: unknown): Record<string, string> {
if (!value || typeof value !== 'object') return {}
return Object.fromEntries(
Object.entries(value as Record<string, unknown>).filter(
([key, v]) => typeof key === 'string' && typeof v === 'string'
)
) as Record<string, string>
}
// ---------------------------------------------------------------------------
// Build cookie payload from a request (edge-compatible)
// ---------------------------------------------------------------------------
/**
* Build a `FirstReferrerData` payload from raw request values.
* Intended for use in Next.js middleware where `document` is not available.
*/
export function buildFirstReferrerData({
referrer,
landingUrl,
}: {
referrer: string
landingUrl: string
}): FirstReferrerData {
let utms: Record<string, string> = {}
let click_ids: Record<string, string> = {}
try {
const url = new URL(landingUrl)
utms = pickParams(url.searchParams, UTM_KEYS)
click_ids = pickParams(url.searchParams, CLICK_ID_KEYS)
} catch {
// If landing URL is malformed, just skip param extraction
}
return {
referrer,
landing_url: landingUrl,
utms,
click_ids,
ts: Date.now(),
}
}
// ---------------------------------------------------------------------------
// Serialize / parse
// ---------------------------------------------------------------------------
export function serializeFirstReferrerCookie(data: FirstReferrerData): string {
return JSON.stringify(data)
}
// ---------------------------------------------------------------------------
// Paid-signal detection
// ---------------------------------------------------------------------------
const PAID_UTM_MEDIUMS = new Set([
'cpc',
'ppc',
'paid_search',
'paidsocial',
'paid_social',
'display',
])
/**
* Returns true if the URL contains ad-network click IDs or paid UTM medium values.
* These indicate the user arrived via a paid campaign, which should override
* stale organic attribution.
*/
export function hasPaidSignals(url: URL): boolean {
for (const key of CLICK_ID_KEYS) {
if (url.searchParams.has(key)) return true
}
const medium = url.searchParams.get('utm_medium')?.toLowerCase()
return medium !== undefined && PAID_UTM_MEDIUMS.has(medium)
}
/**
* Decides whether the first-referrer cookie should be (re-)stamped.
*
* - No cookie + external referrer → stamp (first visit attribution)
* - No cookie + OAuth/SSO redirect referrer → skip (auth ≠ discovery)
* - Cookie exists + paid signals in URL → stamp (paid traffic refresh)
* Note: OAuth check intentionally skipped for paid refresh — the paid
* signal comes from the URL (gclid, utm_medium=cpc), not the referrer.
* - Otherwise → skip
*/
export function shouldRefreshCookie(
existingCookie: boolean,
request: { referrer: string; url: string }
): { stamp: boolean } {
if (!existingCookie) {
if (isOAuthRedirectReferrer(request.referrer)) return { stamp: false }
return { stamp: isExternalReferrer(request.referrer) }
}
try {
const url = new URL(request.url)
return { stamp: hasPaidSignals(url) }
} catch {
return { stamp: false }
}
}
// ---------------------------------------------------------------------------
// Middleware helper — shared across apps/www, apps/docs, and apps/studio
// ---------------------------------------------------------------------------
/**
* Stamp the first-referrer cookie on a Next.js middleware response if the
* request warrants it. This is the single entry point for all app middleware
* files — call it with the incoming request and outgoing response.
*
* On *.supabase.com the cookie is set with `domain=supabase.com` so it's
* readable across all subdomains (www, docs, studio). On other hosts
* (localhost, preview deploys) the domain is left unset so the browser
* stores a host-only cookie instead of rejecting an invalid domain.
*/
export function stampFirstReferrerCookie(
request: MiddlewareRequest,
response: MiddlewareResponse
): void {
const referrer = request.headers.get('referer') ?? ''
const { stamp } = shouldRefreshCookie(request.cookies.has(FIRST_REFERRER_COOKIE_NAME), {
referrer,
url: request.url,
})
if (!stamp) return
const data = buildFirstReferrerData({
referrer,
landingUrl: request.url,
})
response.cookies.set(FIRST_REFERRER_COOKIE_NAME, serializeFirstReferrerCookie(data), {
path: '/',
sameSite: 'lax',
...(request.nextUrl.hostname === 'supabase.com' ||
request.nextUrl.hostname.endsWith('.supabase.com')
? { domain: 'supabase.com', secure: true }
: {}),
maxAge: FIRST_REFERRER_COOKIE_MAX_AGE,
})
}
// ---------------------------------------------------------------------------
// Middleware diagnostic cookie — parse (client-side)
// ---------------------------------------------------------------------------
export interface MwDiagData {
hit: boolean
would_stamp: boolean
has_existing_cookie: boolean
}
/**
* Parse the short-lived middleware diagnostic cookie written by www middleware
* on /dashboard and /docs paths. Returns null if the cookie is absent or malformed.
*/
export function parseMwDiagCookie(cookieHeader: string): MwDiagData | null {
try {
const cookies = cookieHeader.split(';')
const match = cookies.map((c) => c.trim()).find((c) => c.startsWith(`${MW_DIAG_COOKIE_NAME}=`))
if (!match) return null
const rawValue = match.slice(`${MW_DIAG_COOKIE_NAME}=`.length)
const params = new URLSearchParams(decodeURIComponent(rawValue))
if (params.get('hit') !== '1') return null
return {
hit: true,
would_stamp: params.get('would_stamp') === '1',
has_existing_cookie: params.get('has_cookie') === '1',
}
} catch {
return null
}
}
// ---------------------------------------------------------------------------
// Parse cookie from document.cookie header (client-side)
// ---------------------------------------------------------------------------
export function parseFirstReferrerCookie(cookieHeader: string): FirstReferrerData | null {
try {
const cookies = cookieHeader.split(';')
const match = cookies
.map((c) => c.trim())
.find((c) => c.startsWith(`${FIRST_REFERRER_COOKIE_NAME}=`))
if (!match) return null
const value = match.slice(`${FIRST_REFERRER_COOKIE_NAME}=`.length)
const decoded = decodeURIComponent(value)
// Handle double-encoded cookies from before the serializer fix.
// Next.js cookies.set() encodes automatically, but serializeFirstReferrerCookie
// previously called encodeURIComponent too, producing double-encoded values.
let jsonString: string
try {
JSON.parse(decoded)
jsonString = decoded
} catch {
jsonString = decodeURIComponent(decoded)
}
const parsed = JSON.parse(jsonString) as unknown
if (!parsed || typeof parsed !== 'object') return null
const parsedRecord = parsed as Record<string, unknown>
const referrer = parsedRecord.referrer
const landingUrl = parsedRecord.landing_url
if (typeof referrer !== 'string' || typeof landingUrl !== 'string') {
return null
}
const utmsRaw = parsedRecord.utms
const clickIdsRaw = parsedRecord.click_ids
const tsRaw = parsedRecord.ts
const utms = toStringRecord(utmsRaw)
const click_ids = toStringRecord(clickIdsRaw)
const ts = typeof tsRaw === 'number' && Number.isFinite(tsRaw) ? tsRaw : Date.now()
return {
referrer,
landing_url: landingUrl,
utms,
click_ids,
ts,
}
} catch {
return null
}
}