mirror of
https://github.com/nearai/ironclaw.git
synced 2026-06-08 19:04:10 +08:00
* Add Reborn process sandbox backend * Refactor process sandbox module boundaries * Tighten process sandbox contracts * Simplify process sandbox routing * Harden process sandbox review findings * Cover process sandbox service routing * Close process sandbox review gaps * Harden remaining process sandbox review paths * fix(process-sandbox): address henrypark133 review - harden sandbox coverage (#4072) * wire process sandbox spawn approvals
530 lines
26 KiB
Bash
Executable File
530 lines
26 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Pre-commit safety checks for common issues caught by AI code reviewers.
|
|
#
|
|
# Can be run standalone: bash scripts/pre-commit-safety.sh
|
|
# Or installed as a git pre-commit hook via dev-setup.sh.
|
|
#
|
|
# Checks staged .rs files for:
|
|
# 1. Unsafe UTF-8 byte slicing (panics on multi-byte chars)
|
|
# 2. Case-sensitive file extension comparisons
|
|
# 3. Hardcoded /tmp paths in tests (flaky in parallel runs)
|
|
# 4. Tool parameters logged without redaction (secret leaks)
|
|
# 5. Multi-step DB operations without transaction wrapping
|
|
# 6. .unwrap(), .expect(), assert!() in production code (panics)
|
|
# 7. Gateway/CLI handlers bypassing ToolDispatcher (must go through tools)
|
|
# 8. CredentialName referenced in web-layer code (wrong identity at boundary)
|
|
# 9. SSE broadcast emitted outside the engine→AppEvent projection bridge
|
|
# 10. Newly-added pub fn/type/struct/enum/trait with zero callers (dead/speculative API)
|
|
#
|
|
# Also runs check-i18n-parity.sh when crates/ironclaw_gateway/static/i18n/*.js
|
|
# files are staged, to ensure every language pack has the same key set.
|
|
#
|
|
# Suppress individual lines with an inline "// safety: <reason>" comment.
|
|
# For check #7, use "// dispatch-exempt: <reason>" instead.
|
|
# For check #8, use "// web-identity-exempt: <reason>" instead.
|
|
# For check #9, use "// projection-exempt: <category>, <detail>" instead.
|
|
# For check #10, use "// pub-api-exempt: <reason>" instead.
|
|
|
|
set -euo pipefail
|
|
|
|
TEST_BOUNDARIES_FILE=""
|
|
GATEWAY_APP_JS_TMP=""
|
|
|
|
# Determine a suitable base ref for standalone diffs.
|
|
resolve_base_ref() {
|
|
local candidates=(
|
|
"@{upstream}"
|
|
"origin/HEAD"
|
|
"origin/main"
|
|
"origin/master"
|
|
"main"
|
|
"master"
|
|
)
|
|
|
|
for ref in "${candidates[@]}"; do
|
|
if git rev-parse --verify --quiet "$ref" >/dev/null 2>&1; then
|
|
echo "$ref"
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
echo "pre-commit-safety: could not determine a base Git ref for diff (tried: ${candidates[*]})." >&2
|
|
echo "pre-commit-safety: ensure your repository has an upstream or a local main/master branch." >&2
|
|
exit 1
|
|
}
|
|
|
|
# i18n parity: when any language pack changes, all languages must stay in sync.
|
|
# Run before the .rs-focused checks so it fires even when no .rs files change.
|
|
if git diff --cached --quiet 2>/dev/null; then
|
|
HAS_STAGED_CHANGES=0
|
|
I18N_CHANGED=$(git diff --name-only -- 'crates/ironclaw_gateway/static/i18n/*.js' 2>/dev/null || true)
|
|
GATEWAY_APP_JS_CHANGED=$(git diff --name-only -- 'crates/ironclaw_gateway/static/js/' 2>/dev/null || true)
|
|
else
|
|
HAS_STAGED_CHANGES=1
|
|
I18N_CHANGED=$(git diff --cached --name-only -- 'crates/ironclaw_gateway/static/i18n/*.js' 2>/dev/null || true)
|
|
GATEWAY_APP_JS_CHANGED=$(git diff --cached --name-only -- 'crates/ironclaw_gateway/static/js/' 2>/dev/null || true)
|
|
fi
|
|
if [ -n "$I18N_CHANGED" ]; then
|
|
# Resolve script location even when invoked via a symlink (the
|
|
# pre-commit hook is typically a symlink at .git/hooks/pre-commit
|
|
# pointing to this file in scripts/). Walk symlinks until we find the
|
|
# real path, then use its parent directory.
|
|
SOURCE="${BASH_SOURCE[0]:-$0}"
|
|
while [ -L "$SOURCE" ]; do
|
|
LINK_TARGET="$(readlink "$SOURCE")"
|
|
case "$LINK_TARGET" in
|
|
/*) SOURCE="$LINK_TARGET" ;;
|
|
*) SOURCE="$(cd "$(dirname "$SOURCE")" && pwd)/$LINK_TARGET" ;;
|
|
esac
|
|
done
|
|
SCRIPT_DIR="$(cd "$(dirname "$SOURCE")" && pwd)"
|
|
if ! "$SCRIPT_DIR/check-i18n-parity.sh"; then
|
|
echo ""
|
|
echo "Commit blocked: i18n parity check failed."
|
|
echo "Every key added to en.js must also be added to all other language files (zh-CN.js, ko.js, ...)."
|
|
echo "Placeholder tokens like {name} must match across all languages."
|
|
echo "To bypass: git commit --no-verify"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Gateway frontend JS must parse cleanly; a syntax error in any split
|
|
# module leaves the auth shell visible and prevents bootstrap from running.
|
|
# The monolithic `app.js` was split into per-surface/per-concern modules
|
|
# under `static/js/` that are concatenated at compile time into a single
|
|
# `APP_JS` constant (see `crates/ironclaw_gateway/src/assets.rs`). Cuts
|
|
# land on top-level symbol boundaries, so each file is self-parseable —
|
|
# a per-file `node --check` is sufficient.
|
|
if [ -n "$GATEWAY_APP_JS_CHANGED" ]; then
|
|
if ! command -v node >/dev/null 2>&1; then
|
|
echo ""
|
|
echo "Commit blocked: Node.js is required to validate gateway JS syntax."
|
|
echo "Install Node.js and rerun the commit, or bypass with git commit --no-verify"
|
|
exit 1
|
|
fi
|
|
|
|
GATEWAY_JS_TMP=$(mktemp "${TMPDIR:-/tmp}/gateway-js.XXXXXX.js")
|
|
trap 'rm -f "${TEST_BOUNDARIES_FILE:-}" "${GATEWAY_JS_TMP:-}"' EXIT
|
|
FAILED=0
|
|
while IFS= read -r f; do
|
|
[ -z "$f" ] && continue
|
|
# Only validate files still present (skip deletes).
|
|
if [ "$HAS_STAGED_CHANGES" -eq 1 ]; then
|
|
git show ":$f" > "$GATEWAY_JS_TMP" 2>/dev/null || continue
|
|
else
|
|
[ -f "$f" ] || continue
|
|
cp "$f" "$GATEWAY_JS_TMP"
|
|
fi
|
|
if ! node --check "$GATEWAY_JS_TMP" >/dev/null 2>&1; then
|
|
echo " ✗ syntax error: $f"
|
|
FAILED=1
|
|
fi
|
|
done <<<"$GATEWAY_APP_JS_CHANGED"
|
|
|
|
if [ "$FAILED" -eq 1 ]; then
|
|
echo ""
|
|
echo "Commit blocked: one or more gateway JS modules failed syntax validation."
|
|
echo "Fix the parse error(s) above or bypass with git commit --no-verify"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# Support both pre-commit hook (staged files) and standalone (all changed vs base)
|
|
if git diff --cached --quiet 2>/dev/null; then
|
|
# No staged changes -- compare working tree against a resolved base ref
|
|
BASE_REF="$(resolve_base_ref)"
|
|
DIFF_OUTPUT=$(git diff "$BASE_REF" -- '*.rs' 2>/dev/null || true)
|
|
CHANGED_FILES=$(git diff --name-only "$BASE_REF" -- '*.rs' 2>/dev/null || true)
|
|
else
|
|
DIFF_OUTPUT=$(git diff --cached -U0 -- '*.rs' 2>/dev/null || true)
|
|
CHANGED_FILES=$(git diff --cached --name-only --diff-filter=AM -- '*.rs' 2>/dev/null || true)
|
|
fi
|
|
|
|
# Early exit if there are no relevant .rs changes
|
|
if [ -z "$DIFF_OUTPUT" ]; then
|
|
exit 0
|
|
fi
|
|
|
|
# Build a "test mod start line" lookup per changed file by scanning the actual
|
|
# file content. The previous heuristic relied on `mod tests` appearing in the
|
|
# git diff hunk header (`@@ ... mod tests @@`), but that context only shows up
|
|
# for tiny edits inside a known test fn — and never for brand-new files added
|
|
# in a merge. Reading the file directly catches both cases.
|
|
TEST_BOUNDARIES_FILE=$(mktemp)
|
|
trap 'rm -f "${TEST_BOUNDARIES_FILE:-}" "${GATEWAY_APP_JS_TMP:-}"' EXIT
|
|
for f in $CHANGED_FILES; do
|
|
[ -f "$f" ] || continue
|
|
test_line=$(awk '
|
|
/^#\[cfg\(test\)\]/ { cfg_at = NR; next }
|
|
cfg_at && /^mod tests([[:space:]]*\{)?[[:space:]]*$/ { print NR; exit }
|
|
cfg_at && NF > 0 && !/^[[:space:]]*$/ { cfg_at = 0 }
|
|
' "$f")
|
|
if [ -n "$test_line" ]; then
|
|
printf '%s\t%s\n' "$f" "$test_line" >> "$TEST_BOUNDARIES_FILE"
|
|
fi
|
|
done
|
|
|
|
# Filter a unified diff (DIFF_OUTPUT-shaped) to drop `+` lines that come from
|
|
# test code: either inside a `#[cfg(test)] mod tests` block (per the
|
|
# precomputed boundaries) or in any file under the top-level `tests/` dir.
|
|
# Marker, header, and context lines pass through untouched so downstream
|
|
# grep/awk pipelines still see file/hunk anchors.
|
|
strip_test_mod_lines() {
|
|
awk -v boundaries="$TEST_BOUNDARIES_FILE" '
|
|
BEGIN {
|
|
while ((getline line < boundaries) > 0) {
|
|
idx = index(line, "\t")
|
|
if (idx) {
|
|
f = substr(line, 1, idx - 1)
|
|
test_start[f] = substr(line, idx + 1) + 0
|
|
}
|
|
}
|
|
close(boundaries)
|
|
cur_file = ""
|
|
cur_start = 0
|
|
cur_skip_all = 0
|
|
new_line = 0
|
|
}
|
|
/^\+\+\+ b\// {
|
|
cur_file = substr($0, 7)
|
|
cur_start = (cur_file in test_start) ? test_start[cur_file] : 0
|
|
cur_skip_all = (cur_file ~ /(^|\/)tests\// || cur_file ~ /(^|\/)tests\.rs$/)
|
|
new_line = 0
|
|
print
|
|
next
|
|
}
|
|
/^\+\+\+ / {
|
|
cur_file = ""; cur_start = 0; cur_skip_all = 0; new_line = 0
|
|
print
|
|
next
|
|
}
|
|
/^--- / { print; next }
|
|
/^@@ / {
|
|
# Parse new-file starting line from `@@ -X[,Y] +U[,V] @@`
|
|
n = split($0, parts, " ")
|
|
for (i = 1; i <= n; i++) {
|
|
if (substr(parts[i], 1, 1) == "+") {
|
|
range = substr(parts[i], 2)
|
|
c = index(range, ",")
|
|
if (c) range = substr(range, 1, c - 1)
|
|
new_line = range + 0 - 1
|
|
break
|
|
}
|
|
}
|
|
print
|
|
next
|
|
}
|
|
/^\+/ {
|
|
new_line++
|
|
if (cur_skip_all) next
|
|
if (cur_start > 0 && new_line >= cur_start) next
|
|
print
|
|
next
|
|
}
|
|
/^-/ { print; next }
|
|
{ new_line++; print }
|
|
'
|
|
}
|
|
|
|
DIFF_OUTPUT_NO_TESTS=$(printf '%s\n' "$DIFF_OUTPUT" | strip_test_mod_lines)
|
|
|
|
WARNINGS=0
|
|
|
|
warn() {
|
|
if [ "$WARNINGS" -eq 0 ]; then
|
|
echo ""
|
|
echo "=== Pre-commit Safety Checks ==="
|
|
echo ""
|
|
fi
|
|
WARNINGS=$((WARNINGS + 1))
|
|
echo " [$1] $2"
|
|
}
|
|
|
|
# 1. Unsafe UTF-8 byte slicing: &s[..N] or &s[..some_var] on strings
|
|
# Safe patterns: is_char_boundary, char_indices, // safety:
|
|
if echo "$DIFF_OUTPUT_NO_TESTS" | grep -nE '^\+' | grep -E '\[\.\..*\]' | grep -vE 'is_char_boundary|char_indices|// safety:|as_bytes|Vec<|&\[u8\]|\[u8\]|bytes\(\)|&bytes' | head -3 | grep -q .; then
|
|
warn "UTF8" "Possible unsafe byte-index string slicing. Use is_char_boundary() or char_indices()."
|
|
echo "$DIFF_OUTPUT_NO_TESTS" | grep -nE '^\+' | grep -E '\[\.\..*\]' | grep -vE 'is_char_boundary|char_indices|// safety:|as_bytes|Vec<|&\[u8\]|\[u8\]|bytes\(\)|&bytes' | head -3 | sed 's/^/ /'
|
|
fi
|
|
|
|
# 2. Case-sensitive file extension checks
|
|
# Match: .ends_with(".png") without prior to_lowercase
|
|
if echo "$DIFF_OUTPUT" | grep -nE '^\+.*ends_with\("\.([pP][nN][gG]|[jJ][pP][eE]?[gG]|[gG][iI][fF]|[wW][eE][bB][pP]|[mM][dD])"\)' | grep -vE 'to_lowercase|to_ascii_lowercase|// safety:' | head -3 | grep -q .; then
|
|
warn "CASE" "Case-sensitive file extension comparison. Normalize to lowercase first."
|
|
echo "$DIFF_OUTPUT" | grep -nE '^\+.*ends_with\("\.([pP][nN][gG]|[jJ][pP][eE]?[gG]|[gG][iI][fF]|[wW][eE][bB][pP]|[mM][dD])"\)' | grep -vE 'to_lowercase|to_ascii_lowercase|// safety:' | head -3 | sed 's/^/ /'
|
|
fi
|
|
|
|
# 3. Hardcoded /tmp paths in test files
|
|
if echo "$DIFF_OUTPUT_NO_TESTS" | grep -nE '^\+.*"/tmp/' | grep -vE 'tempfile|tempdir|// safety:' | head -3 | grep -q .; then
|
|
warn "TMPDIR" "Hardcoded /tmp path. Use tempfile::tempdir() for parallel-safe tests."
|
|
echo "$DIFF_OUTPUT_NO_TESTS" | grep -nE '^\+.*"/tmp/' | grep -vE 'tempfile|tempdir|// safety:' | head -3 | sed 's/^/ /'
|
|
fi
|
|
|
|
# 4. Logging tool parameters without redaction
|
|
if echo "$DIFF_OUTPUT" | grep -nE '^\+.*tracing::(info|debug|warn|error).*param' | grep -vE 'redact|// safety:' | head -3 | grep -q .; then
|
|
warn "REDACT" "Logging tool parameters without redaction. Use redact_params() first."
|
|
echo "$DIFF_OUTPUT" | grep -nE '^\+.*tracing::(info|debug|warn|error).*param' | grep -vE 'redact|// safety:' | head -3 | sed 's/^/ /'
|
|
fi
|
|
|
|
# 5. Multi-step DB operations without transaction
|
|
# Uses -W (function context) to reduce false positives from existing transactions.
|
|
# Suppressible with "// safety:" in the hunk.
|
|
DIFF_W_OUTPUT=$(git diff --cached -W -- '*.rs' 2>/dev/null || git diff "$(resolve_base_ref)" -W -- '*.rs' 2>/dev/null || true)
|
|
if [ -n "$DIFF_W_OUTPUT" ]; then
|
|
HUNK_COUNT=$(echo "$DIFF_W_OUTPUT" | awk '
|
|
/^@@/ {
|
|
if (count >= 2 && !has_tx && !has_safety) found++
|
|
count=0; has_tx=0; has_safety=0
|
|
}
|
|
/^\+.*\.(execute|query)\(/ { count++ }
|
|
/^\+.*(transaction|\.tx\.|\.begin\()/ { has_tx=1 }
|
|
/ .*(transaction|\.tx\.|\.begin\()/ { has_tx=1 }
|
|
/\/\/ safety:/ { has_safety=1 }
|
|
END {
|
|
if (count >= 2 && !has_tx && !has_safety) found++
|
|
print found+0
|
|
}
|
|
')
|
|
if [ "$HUNK_COUNT" -gt 0 ]; then
|
|
warn "TX" "Multiple DB operations in same function without transaction. Wrap in a transaction for atomicity."
|
|
echo "$DIFF_W_OUTPUT" | awk '
|
|
/^@@/ {
|
|
if (count >= 2 && !has_tx && !has_safety) { print buf }
|
|
buf=""; count=0; has_tx=0; has_safety=0
|
|
}
|
|
/^\+.*\.(execute|query)\(/ { count++ }
|
|
/^\+.*(transaction|\.tx\.|\.begin\()/ { has_tx=1 }
|
|
/ .*(transaction|\.tx\.|\.begin\()/ { has_tx=1 }
|
|
/\/\/ safety:/ { has_safety=1 }
|
|
{ buf = buf "\n" $0 }
|
|
END {
|
|
if (count >= 2 && !has_tx && !has_safety) { print buf }
|
|
}
|
|
' | grep -E '^\+.*\.(execute|query)\(' | head -4 | sed 's/^/ /'
|
|
fi
|
|
fi
|
|
|
|
# 6. .unwrap(), .expect(), assert!() in production code
|
|
# Matches added lines containing panic-inducing calls.
|
|
# Excludes test files, test modules, and debug_assert (compiled out in release).
|
|
# Suppress with "// safety: <reason>".
|
|
PROD_DIFF="$DIFF_OUTPUT_NO_TESTS"
|
|
# Strip hunks from test-only files (tests/ directory, *_test.rs, test_*.rs)
|
|
PROD_DIFF=$(echo "$PROD_DIFF" | grep -v '^+++ b/tests/' || true)
|
|
# Strip hunks whose @@ context line indicates a test module.
|
|
# git diff includes the enclosing function/module name after @@.
|
|
# Only match `mod tests` (the conventional #[cfg(test)] module) — do NOT
|
|
# match `fn test_*` because production code can have functions named test_*.
|
|
PROD_DIFF=$(echo "$PROD_DIFF" | awk '
|
|
/^@@ / { in_test = ($0 ~ /mod tests/) }
|
|
!in_test { print }
|
|
' || true)
|
|
if echo "$PROD_DIFF" | grep -nE '^\+' \
|
|
| grep -E '\.(unwrap|expect)\(|[^_]assert(_eq|_ne)?!' \
|
|
| grep -vE 'debug_assert|// safety:|#\[cfg\(test\)\]|#\[test\]|mod tests' \
|
|
| head -5 | grep -q .; then
|
|
warn "PANIC" "Production code must not use .unwrap(), .expect(), or assert!(). Use proper error handling."
|
|
echo "$PROD_DIFF" | grep -nE '^\+' \
|
|
| grep -E '\.(unwrap|expect)\(|[^_]assert(_eq|_ne)?!' \
|
|
| grep -vE 'debug_assert|// safety:|#\[cfg\(test\)\]|#\[test\]|mod tests' \
|
|
| head -5 | sed 's/^/ /'
|
|
fi
|
|
|
|
# 7. Gateway/CLI handlers bypassing ToolDispatcher.
|
|
# Channel handlers and CLI commands must route mutations through
|
|
# `ToolDispatcher::dispatch()` so every UI/CLI-initiated action gets the
|
|
# same audit trail and safety pipeline as agent-initiated tool calls.
|
|
# See `.claude/rules/tools.md` "Everything Goes Through Tools".
|
|
#
|
|
# The check looks at .rs files under src/channels/web/handlers/ and
|
|
# src/cli/ for newly-added lines that touch direct manager fields on the
|
|
# gateway state. Suppress with "// dispatch-exempt: <reason>".
|
|
DISPATCH_DIFF=$(git diff --cached -U0 -- 'src/channels/web/handlers/*.rs' 'src/cli/*.rs' 2>/dev/null || true)
|
|
if [ -z "$DISPATCH_DIFF" ]; then
|
|
DISPATCH_DIFF=$(git diff "$(resolve_base_ref)" -U0 -- 'src/channels/web/handlers/*.rs' 'src/cli/*.rs' 2>/dev/null || true)
|
|
fi
|
|
if [ -n "$DISPATCH_DIFF" ]; then
|
|
DISPATCH_HITS=$(echo "$DISPATCH_DIFF" | grep -nE '^\+' \
|
|
| grep -E 'state\.(store|workspace|workspace_pool|extension_manager|skill_registry|session_manager)\.' \
|
|
| grep -vE '// dispatch-exempt:|// safety:|:\+\+\+ ' \
|
|
| head -5 || true)
|
|
if [ -n "$DISPATCH_HITS" ]; then
|
|
warn "DISPATCH" "Handler directly touches state.{store,workspace,extension_manager,skill_registry,session_manager}. Route through ToolDispatcher::dispatch() instead. See .claude/rules/tools.md."
|
|
echo "$DISPATCH_HITS" | sed 's/^/ /'
|
|
fi
|
|
fi
|
|
|
|
# 8. CredentialName referenced in web-layer code.
|
|
# CredentialName is a backend/secrets-store identity. Web routes and
|
|
# web DTOs take ExtensionName; the dispatcher and auth_manager resolve
|
|
# credential identity from the extension name server-side. An explicit
|
|
# `CredentialName` reference in src/channels/web/** (except inside
|
|
# `#[cfg(test)] mod tests` blocks) means the wrong identity is reaching
|
|
# the web boundary. See src/channels/web/CLAUDE.md "Identity types at
|
|
# the web boundary" and .claude/rules/types.md.
|
|
#
|
|
# Suppress with "// web-identity-exempt: <reason>" when the reference
|
|
# is genuinely reading an already-typed value off a backend struct
|
|
# (e.g., destructuring `ResumeKind::Authentication` to log the name).
|
|
WEB_IDENTITY_DIFF=$(git diff --cached -U0 -- 'src/channels/web/*.rs' 'src/channels/web/**/*.rs' 2>/dev/null || true)
|
|
if [ -z "$WEB_IDENTITY_DIFF" ]; then
|
|
WEB_IDENTITY_DIFF=$(git diff "$(resolve_base_ref)" -U0 -- 'src/channels/web/*.rs' 'src/channels/web/**/*.rs' 2>/dev/null || true)
|
|
fi
|
|
if [ -n "$WEB_IDENTITY_DIFF" ]; then
|
|
# Strip lines inside `#[cfg(test)] mod tests` blocks using the same
|
|
# precomputed boundaries used for other prod-only checks.
|
|
WEB_IDENTITY_PROD=$(printf '%s\n' "$WEB_IDENTITY_DIFF" | strip_test_mod_lines)
|
|
# `(^|[^[:alnum:]_])CredentialName([^[:alnum:]_]|$)` is a portable
|
|
# word boundary; `grep -E`'s `\b` is a GNU extension and is not
|
|
# recognised by BSD grep.
|
|
WEB_IDENTITY_HITS=$(echo "$WEB_IDENTITY_PROD" | grep -nE '^\+' \
|
|
| grep -E '(^|[^[:alnum:]_])CredentialName([^[:alnum:]_]|$)' \
|
|
| grep -vE '// web-identity-exempt:|// safety:|:\+\+\+ ' \
|
|
| head -5 || true)
|
|
if [ -n "$WEB_IDENTITY_HITS" ]; then
|
|
warn "CREDNAME" "\`CredentialName\` referenced in src/channels/web/** — web code takes \`ExtensionName\`; credential identity stays backend-side. Push the mapping into bridge::auth_manager or annotate with '// web-identity-exempt: <reason>'."
|
|
echo "$WEB_IDENTITY_HITS" | sed 's/^/ /'
|
|
fi
|
|
fi
|
|
|
|
# 9. SSE `AppEvent` broadcast outside the engine→AppEvent projection bridge.
|
|
# Every `AppEvent` that hits the SSE/WS stream should project from a typed
|
|
# source log (`ironclaw_engine::EventKind`, `JobEvent`, channel-lifecycle)
|
|
# or belong to the documented transport-only allowlist. Direct
|
|
# `sse.broadcast(...)` / `sse.broadcast_for_user(...)` calls from tools,
|
|
# handlers, or extension managers drift the UI stream out of alignment
|
|
# with the replayable source, which is the root cause of the state
|
|
# desync class tracked by #2792. See `.claude/rules/gateway-events.md`.
|
|
#
|
|
# Annotation format: `// projection-exempt: <category>, <detail>` — the
|
|
# category names the source log (`bridge dispatcher`, `channel-lifecycle`,
|
|
# `sandbox JobEvent`, `legacy v1 auth`) or the transport-only allowlist
|
|
# (`transport-only, heartbeat`). The comma is required — an unnamed
|
|
# `// projection-exempt: legacy` does not suppress the check.
|
|
#
|
|
# Two match patterns:
|
|
# 1. `*.broadcast_for_user(...)` on any receiver — `broadcast_for_user`
|
|
# is unique to `SseManager` (see `src/channels/web/platform/sse.rs`),
|
|
# so matching the method name alone catches both same-line
|
|
# receivers (`state.sse.broadcast_for_user(...)`) and rustfmt
|
|
# wraps (`state\n .sse\n .broadcast_for_user(...)`) without
|
|
# risk of false positives from other types.
|
|
# 2. `<word-boundary>sse.broadcast(...)` — the single-name `.broadcast(`
|
|
# on its own line can be the `Channel` trait method, so this arm
|
|
# is deliberately narrower and anchors on an `sse` receiver.
|
|
# `(^|[^[:alnum:]_])sse\.` is a portable boundary; `grep -E`'s
|
|
# `\b` is a GNU extension and is not recognised by BSD grep, so
|
|
# we avoid it here.
|
|
# Suppression regex requires a non-empty detail after the comma:
|
|
# `[^,]+,[[:space:]]*[^[:space:]]` — `// projection-exempt: foo,`
|
|
# (empty detail) does NOT exempt; `// projection-exempt: foo, bar`
|
|
# does. This matches the documented contract in
|
|
# `.claude/rules/gateway-events.md`.
|
|
PROJECTION_HITS=$(echo "$DIFF_OUTPUT_NO_TESTS" | grep -nE '^\+' \
|
|
| grep -E '(\.broadcast_for_user|(^|[^[:alnum:]_])sse\.broadcast)[[:space:]]*\(' \
|
|
| grep -vE '// projection-exempt: [^,]+,[[:space:]]*[^[:space:]]|// safety:|:\+\+\+ ' \
|
|
| head -5 || true)
|
|
if [ -n "$PROJECTION_HITS" ]; then
|
|
warn "PROJECTION" "Direct SSE broadcast outside the engine→AppEvent bridge. Route through \`thread_event_to_app_events\` in \`src/bridge/router.rs\` (project from a typed source log) or annotate with '// projection-exempt: <category>, <detail>'. See .claude/rules/gateway-events.md."
|
|
echo "$PROJECTION_HITS" | sed 's/^/ /'
|
|
fi
|
|
|
|
# 10. Dead/speculative public API: newly-added `pub fn`/`pub type`/`pub struct`/
|
|
# `pub enum`/`pub trait` whose identifier appears exactly once in the whole
|
|
# repo — i.e. only the definition, no callers/uses. This is the "Theme 1"
|
|
# maintainability anti-pattern: accessors, `_dyn` variants, and getters that
|
|
# expand the stable surface with zero consumers.
|
|
#
|
|
# Heuristic, not a proof: a brand-new public symbol with a single repo-wide
|
|
# occurrence is almost always dead-on-arrival. It's a WARNING (the script
|
|
# only hard-blocks on the documented categories above via the shared
|
|
# summary, but this check is advisory by design — see note below).
|
|
#
|
|
# Only added lines are scanned (`DIFF_OUTPUT_NO_TESTS` already drops test
|
|
# modules and `tests/` files), so it never trips on pre-existing code or
|
|
# on test-only helpers. Suppress an intentional new-but-uncalled symbol
|
|
# (e.g. a trait method implemented elsewhere, an FFI/`#[no_mangle]` export,
|
|
# or API staged ahead of its first caller in the same series) with
|
|
# "// pub-api-exempt: <reason>" on the declaration line.
|
|
PUB_API_HITS=""
|
|
# Pull added pub-item declaration lines, excluding those already exempted.
|
|
PUB_DECL_LINES=$(echo "$DIFF_OUTPUT_NO_TESTS" | grep -E '^\+' \
|
|
| grep -E '^\+[[:space:]]*pub(\([^)]*\))?[[:space:]]+(async[[:space:]]+)?(unsafe[[:space:]]+)?(fn|type|struct|enum|trait)[[:space:]]' \
|
|
| grep -vE '// pub-api-exempt:|// safety:' || true)
|
|
if [ -n "$PUB_DECL_LINES" ]; then
|
|
while IFS= read -r line; do
|
|
[ -z "$line" ] && continue
|
|
# Extract the declared identifier: strip leading `+`, the `pub[(...)]`
|
|
# qualifier and modifier keywords, the item keyword, then take the
|
|
# first identifier token (cutting at `<`, `(`, `{`, `:`, whitespace).
|
|
ident=$(printf '%s\n' "$line" | sed -E \
|
|
-e 's/^\+[[:space:]]*//' \
|
|
-e 's/^pub(\([^)]*\))?[[:space:]]+//' \
|
|
-e 's/^(async[[:space:]]+)?(unsafe[[:space:]]+)?//' \
|
|
-e 's/^(fn|type|struct|enum|trait)[[:space:]]+//' \
|
|
| grep -oE '^[A-Za-z_][A-Za-z0-9_]*' || true)
|
|
[ -z "$ident" ] && continue
|
|
# Count repo-wide occurrences of the identifier as a whole word.
|
|
# `git grep -w` is fast and respects .gitignore; one hit == definition only.
|
|
occ=$(git grep -wIE "$ident" -- '*.rs' 2>/dev/null | wc -l | tr -d ' ')
|
|
if [ "${occ:-0}" -le 1 ]; then
|
|
PUB_API_HITS="${PUB_API_HITS} ${ident} (${occ:-0} occurrence) — ${line#+}
|
|
"
|
|
fi
|
|
done <<<"$PUB_DECL_LINES"
|
|
fi
|
|
if [ -n "$PUB_API_HITS" ]; then
|
|
warn "PUBAPI" "New \`pub\` item(s) with zero callers/uses (dead or speculative public API — maintainability audit Theme 1). Make them \`pub(crate)\`, delete, or annotate with '// pub-api-exempt: <reason>'."
|
|
printf '%s' "$PUB_API_HITS"
|
|
fi
|
|
|
|
# 10. Cross-tenant safety: an UNSCOPED `sse.broadcast(...)` call (i.e. not
|
|
# `broadcast_for_user`) delivers the event to every connected
|
|
# subscriber, regardless of which user owns the underlying state.
|
|
# In multi-tenant deployments that pattern leaks tool calls, log
|
|
# lines, and onboarding state across tenants — see the cross-tenant
|
|
# thread visibility incident and the `dispatch_status_event` fix.
|
|
#
|
|
# A new `sse.broadcast(...)` line is acceptable only if it is one
|
|
# of:
|
|
# (a) Transport-only (heartbeat / stream_chunk) — the canonical
|
|
# projection-exempt category that already documents the
|
|
# empty-payload allowlist.
|
|
# (b) Annotated with `// multi-tenant-safe: <reason>` on the same
|
|
# line, naming the structural reason the event cannot leak
|
|
# tenant-bound state (e.g. "single-tenant fallback inside an
|
|
# explicit multi_tenant_mode=false branch").
|
|
# Otherwise prefer `broadcast_for_user(uid, ...)` with a known
|
|
# `user_id` derived from the source-log payload.
|
|
#
|
|
# This check runs only on `src/**` and `crates/**` — `tests/**` and
|
|
# `#[cfg(test)]` blocks were already filtered out upstream.
|
|
# Marker matching: `//.*multi-tenant-safe: <non-whitespace>` — the
|
|
# `//.*` prefix anchors the marker to a Rust comment but allows
|
|
# additional annotations on the same line (e.g. when `// projection-
|
|
# exempt: ...; multi-tenant-safe: ...` carry both markers in one
|
|
# trailing comment because Rust line comments cannot be nested).
|
|
MT_BROADCAST_HITS=$(echo "$DIFF_OUTPUT_NO_TESTS" | grep -nE '^\+' \
|
|
| grep -E '(^|[^[:alnum:]_])sse\.broadcast[[:space:]]*\(' \
|
|
| grep -vE '\.broadcast_for_user' \
|
|
| grep -vE '// projection-exempt: transport-only,[[:space:]]*[^[:space:]]' \
|
|
| grep -vE '//.*multi-tenant-safe: [^[:space:]]' \
|
|
| grep -vE '// safety:|:\+\+\+ ' \
|
|
| head -5 || true)
|
|
if [ -n "$MT_BROADCAST_HITS" ]; then
|
|
warn "MULTITENANT" "Unscoped \`sse.broadcast(...)\` in code reachable in multi-tenant mode. Switch to \`broadcast_for_user(&user_id, ...)\` or annotate with '// multi-tenant-safe: <reason>'. See \`dispatch_status_event\` in \`src/channels/web/mod.rs\` and the cross-tenant thread visibility incident."
|
|
echo "$MT_BROADCAST_HITS" | sed 's/^/ /'
|
|
fi
|
|
|
|
if [ "$WARNINGS" -gt 0 ]; then
|
|
echo ""
|
|
echo "Found $WARNINGS potential issue(s). Fix them or add '// safety: <reason>' to suppress."
|
|
echo "(For DISPATCH warnings, use '// dispatch-exempt: <reason>' instead.)"
|
|
echo "(For CREDNAME warnings, use '// web-identity-exempt: <reason>' instead.)"
|
|
echo "(For PROJECTION warnings, use '// projection-exempt: <category>, <detail>' instead.)"
|
|
echo "(For PUBAPI warnings, use '// pub-api-exempt: <reason>' instead.)"
|
|
echo "(For MULTITENANT warnings, use '// multi-tenant-safe: <reason>' instead.)"
|
|
echo ""
|
|
exit 1
|
|
fi
|