Files
rustfs/scripts/run_issue_2941_perf_capture.sh
houseme 6e12289339 fix(runtime): finalize issue 2941 profiling cleanup (#2983)
* perf(runtime): narrow profiling support and upgrade starshard

* style(notify): normalize starshard imports

* perf(ecstore): reduce list_path_raw coordination overhead

* docs(scripts): add issue 2941 perf capture workflow

* fix(runtime): finalize issue 2941 profiling cleanup

* build(deps): bump quick-xml to 0.40.0

* chore(scripts): untrack local perf capture guide

* fix(scripts): honor label in perf capture output
2026-05-16 11:09:04 +00:00

291 lines
8.3 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
LABEL="${LABEL:-issue-2941}"
DURATION_SECS="${DURATION_SECS:-60}"
PERF_FREQ="${PERF_FREQ:-99}"
OUT_DIR="${OUT_DIR:-}"
RUSTFS_PID="${RUSTFS_PID:-}"
CONTAINER_NAME="${CONTAINER_NAME:-}"
ENDPOINT="${ENDPOINT:-http://127.0.0.1:9000}"
PERF_MODE="${PERF_MODE:-auto}" # auto|on|off
SUDO_CMD="${SUDO_CMD:-}" # example: sudo
usage() {
cat <<'USAGE'
Usage:
scripts/run_issue_2941_perf_capture.sh [options]
Options:
--label <name> artifact label prefix
--duration <secs> sample duration in seconds (default: 60)
--out-dir <dir> artifact output directory
--pid <pid> rustfs pid; auto-detect if omitted
--container <name> docker container name/id for extra stats
--endpoint <url> rustfs endpoint for health probes (default: http://127.0.0.1:9000)
--perf <auto|on|off> whether to run perf record (default: auto)
--perf-freq <hz> perf sample frequency (default: 99)
--sudo-cmd <cmd> optional prefix for privileged perf, e.g. "sudo"
-h, --help show help
Environment:
LABEL
DURATION_SECS
PERF_FREQ
OUT_DIR
RUSTFS_PID
CONTAINER_NAME
ENDPOINT
PERF_MODE
SUDO_CMD
Examples:
scripts/run_issue_2941_perf_capture.sh --label musl-baseline --container rustfs
scripts/run_issue_2941_perf_capture.sh --label glibc-test --pid 12345 --perf on --sudo-cmd sudo
USAGE
}
log() {
printf '[INFO] %s\n' "$*"
}
warn() {
printf '[WARN] %s\n' "$*" >&2
}
require_arg() {
local option="$1"
local value="${2-}"
if [[ $# -lt 2 || -z "${value}" || "${value}" == --* ]]; then
warn "missing value for ${option}"
usage
exit 1
fi
}
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--label) require_arg "$1" "${2-}"; LABEL="$2"; shift 2 ;;
--duration) require_arg "$1" "${2-}"; DURATION_SECS="$2"; shift 2 ;;
--out-dir) require_arg "$1" "${2-}"; OUT_DIR="$2"; shift 2 ;;
--pid) require_arg "$1" "${2-}"; RUSTFS_PID="$2"; shift 2 ;;
--container) require_arg "$1" "${2-}"; CONTAINER_NAME="$2"; shift 2 ;;
--endpoint) require_arg "$1" "${2-}"; ENDPOINT="$2"; shift 2 ;;
--perf) require_arg "$1" "${2-}"; PERF_MODE="$2"; shift 2 ;;
--perf-freq) require_arg "$1" "${2-}"; PERF_FREQ="$2"; shift 2 ;;
--sudo-cmd) require_arg "$1" "${2-}"; SUDO_CMD="$2"; shift 2 ;;
-h|--help) usage; exit 0 ;;
*)
warn "unknown argument: $1"
usage
exit 1
;;
esac
done
}
finalize_defaults() {
if [[ -z "${OUT_DIR}" ]]; then
OUT_DIR="${PROJECT_ROOT}/target/perf/${LABEL}-$(date +%Y%m%d-%H%M%S)}"
fi
}
command_exists() {
command -v "$1" >/dev/null 2>&1
}
write_cmd_output() {
local out_file="$1"
shift
if "$@" >"$out_file" 2>&1; then
return 0
fi
warn "command failed, see ${out_file}"
return 1
}
resolve_pid() {
if [[ -n "${RUSTFS_PID}" ]]; then
printf '%s\n' "${RUSTFS_PID}"
return
fi
if [[ -n "${CONTAINER_NAME}" ]] && command_exists docker; then
local pid
pid="$(docker inspect --format '{{.State.Pid}}' "${CONTAINER_NAME}" 2>/dev/null || true)"
if [[ -n "${pid}" && "${pid}" != "0" ]]; then
printf '%s\n' "${pid}"
return
fi
fi
pgrep -n rustfs || true
}
snapshot_proc() {
local pid="$1"
local prefix="$2"
[[ -n "${pid}" ]] || return 0
[[ -r "/proc/${pid}/status" ]] && cp "/proc/${pid}/status" "${OUT_DIR}/${prefix}.proc-status.txt" || true
[[ -r "/proc/${pid}/io" ]] && cp "/proc/${pid}/io" "${OUT_DIR}/${prefix}.proc-io.txt" || true
[[ -r "/proc/${pid}/sched" ]] && cp "/proc/${pid}/sched" "${OUT_DIR}/${prefix}.proc-sched.txt" || true
[[ -r "/proc/${pid}/smaps_rollup" ]] && cp "/proc/${pid}/smaps_rollup" "${OUT_DIR}/${prefix}.proc-smaps-rollup.txt" || true
[[ -r "/proc/${pid}/limits" ]] && cp "/proc/${pid}/limits" "${OUT_DIR}/${prefix}.proc-limits.txt" || true
if command_exists ps; then
ps -p "${pid}" -o pid,ppid,stat,pcpu,pmem,rss,vsz,etime,args >"${OUT_DIR}/${prefix}.ps.txt" 2>&1 || true
ps -L -p "${pid}" -o pid,tid,psr,pcpu,stat,wchan:32,comm >"${OUT_DIR}/${prefix}.threads.txt" 2>&1 || true
fi
if command_exists top; then
if [[ "$(uname -s)" == "Linux" ]]; then
top -H -b -n 1 -p "${pid}" >"${OUT_DIR}/${prefix}.top.txt" 2>&1 || true
else
top -l 1 -pid "${pid}" >"${OUT_DIR}/${prefix}.top.txt" 2>&1 || true
fi
fi
}
capture_host_info() {
write_cmd_output "${OUT_DIR}/uname.txt" uname -a || true
command_exists lscpu && write_cmd_output "${OUT_DIR}/lscpu.txt" lscpu || true
command_exists free && write_cmd_output "${OUT_DIR}/free.txt" free -h || true
command_exists df && write_cmd_output "${OUT_DIR}/df.txt" df -h || true
command_exists mount && write_cmd_output "${OUT_DIR}/mount.txt" mount || true
}
capture_endpoint_info() {
if command_exists curl; then
curl -fsS "${ENDPOINT}/health" >"${OUT_DIR}/health.txt" 2>&1 || true
curl -fsS "${ENDPOINT}/health/ready" >"${OUT_DIR}/health-ready.txt" 2>&1 || true
fi
}
capture_container_info() {
[[ -n "${CONTAINER_NAME}" ]] || return 0
command_exists docker || return 0
docker inspect "${CONTAINER_NAME}" >"${OUT_DIR}/docker-inspect.json" 2>&1 || true
docker logs --tail 500 "${CONTAINER_NAME}" >"${OUT_DIR}/docker-logs-tail.txt" 2>&1 || true
docker stats --no-stream --format '{{json .}}' "${CONTAINER_NAME}" >"${OUT_DIR}/docker-stats-once.jsonl" 2>&1 || true
}
sample_container_stats_loop() {
[[ -n "${CONTAINER_NAME}" ]] || return 0
command_exists docker || return 0
local out_file="${OUT_DIR}/docker-stats-loop.jsonl"
: >"${out_file}"
local end_ts=$((SECONDS + DURATION_SECS))
while (( SECONDS < end_ts )); do
docker stats --no-stream --format '{{json .}}' "${CONTAINER_NAME}" >>"${out_file}" 2>/dev/null || true
sleep 1
done
}
sample_pidstat() {
local pid="$1"
[[ -n "${pid}" ]] || return 0
command_exists pidstat || {
echo "pidstat unavailable" >"${OUT_DIR}/pidstat.txt"
return 0
}
pidstat -durwh -p "${pid}" 1 "${DURATION_SECS}" >"${OUT_DIR}/pidstat.txt" 2>&1 || true
}
sample_perf() {
local pid="$1"
[[ -n "${pid}" ]] || return 0
[[ "${PERF_MODE}" == "off" ]] && return 0
command_exists perf || {
echo "perf unavailable" >"${OUT_DIR}/perf-record.log"
[[ "${PERF_MODE}" == "on" ]] && warn "perf requested but not installed"
return 0
}
local perf_data="${OUT_DIR}/perf.data"
local perf_log="${OUT_DIR}/perf-record.log"
local perf_report="${OUT_DIR}/perf-report.txt"
local -a prefix=()
if [[ -n "${SUDO_CMD}" ]]; then
read -r -a prefix <<<"${SUDO_CMD}"
fi
if "${prefix[@]}" perf record -F "${PERF_FREQ}" -g -p "${pid}" -o "${perf_data}" -- sleep "${DURATION_SECS}" \
>"${perf_log}" 2>&1; then
"${prefix[@]}" perf report --stdio -i "${perf_data}" >"${perf_report}" 2>&1 || true
else
if [[ "${PERF_MODE}" == "on" ]]; then
warn "perf record failed; see ${perf_log}"
fi
fi
}
capture_version_info() {
local pid="$1"
if [[ -n "${pid}" && -x "/proc/${pid}/exe" ]]; then
readlink "/proc/${pid}/exe" >"${OUT_DIR}/binary-path.txt" 2>&1 || true
"/proc/${pid}/exe" --help >"${OUT_DIR}/binary-help.txt" 2>&1 || true
fi
}
main() {
parse_args "$@"
finalize_defaults
mkdir -p "${OUT_DIR}"
local pid
pid="$(resolve_pid)"
if [[ -z "${pid}" ]]; then
warn "failed to detect rustfs pid automatically"
else
log "using rustfs pid=${pid}"
fi
cat >"${OUT_DIR}/capture-meta.txt" <<EOF
label=${LABEL}
duration_secs=${DURATION_SECS}
perf_freq=${PERF_FREQ}
endpoint=${ENDPOINT}
container_name=${CONTAINER_NAME}
rustfs_pid=${pid}
perf_mode=${PERF_MODE}
sudo_cmd=${SUDO_CMD}
started_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
project_root=${PROJECT_ROOT}
git_branch=$(git -C "${PROJECT_ROOT}" branch --show-current 2>/dev/null || true)
git_head=$(git -C "${PROJECT_ROOT}" rev-parse HEAD 2>/dev/null || true)
EOF
capture_host_info
capture_endpoint_info
capture_container_info
capture_version_info "${pid}"
snapshot_proc "${pid}" "start"
local bg_pids=()
sample_pidstat "${pid}" &
bg_pids+=($!)
sample_container_stats_loop &
bg_pids+=($!)
sample_perf "${pid}" &
bg_pids+=($!)
for bg_pid in "${bg_pids[@]}"; do
wait "${bg_pid}" || true
done
snapshot_proc "${pid}" "end"
capture_endpoint_info
log "issue-2941 perf capture artifacts written to ${OUT_DIR}"
find "${OUT_DIR}" -maxdepth 1 -type f | sort
}
main "$@"