From 41ba34a145e1b30d253edb0e4e510b416fa24a7c Mon Sep 17 00:00:00 2001 From: houseme Date: Wed, 6 May 2026 20:22:13 +0800 Subject: [PATCH] fix(rpc): add issue 2815 regression and docker validation (#2828) --- .docker/test/issues-2815/.gitignore | 1 + .docker/test/issues-2815/README.md | 106 +++++++++++++++++ .docker/test/issues-2815/docker-compose.yml | 120 ++++++++++++++++++++ Dockerfile.source | 20 ++-- rustfs/src/storage/rpc/mod.rs | 63 ++++++++++ 5 files changed, 302 insertions(+), 8 deletions(-) create mode 100644 .docker/test/issues-2815/.gitignore create mode 100644 .docker/test/issues-2815/README.md create mode 100644 .docker/test/issues-2815/docker-compose.yml diff --git a/.docker/test/issues-2815/.gitignore b/.docker/test/issues-2815/.gitignore new file mode 100644 index 000000000..8fce60300 --- /dev/null +++ b/.docker/test/issues-2815/.gitignore @@ -0,0 +1 @@ +data/ diff --git a/.docker/test/issues-2815/README.md b/.docker/test/issues-2815/README.md new file mode 100644 index 000000000..44a014312 --- /dev/null +++ b/.docker/test/issues-2815/README.md @@ -0,0 +1,106 @@ +# Issue 2815 Local Docker Verification + +## Purpose + +This directory contains the local distributed Docker verification assets used to validate issue `#2815` against the current source build. + +The target behavior is: + +- 4-node distributed cluster starts successfully +- `/health/ready` becomes reachable on each node +- logs no longer contain `storage_info failed: Io error: wrong msgpack marker FixArray(1)` +- internode RPC authentication succeeds with an explicit non-default RPC secret + +## Files + +- `docker-compose.yml`: 4-node distributed cluster using a locally built image + +## Data Directories + +Create the bind-mount directories before `docker compose up`: + +```bash +mkdir -p .docker/test/issues-2815/data/rustfs{1..4}-disk{0..3} +``` + +## Build + +Apple Silicon / arm64 host: + +```bash +docker build --platform linux/arm64 -f Dockerfile.source -t rustfs-issue-2815-local . +``` + +If you intentionally want amd64 emulation: + +```bash +docker build --platform linux/amd64 -f Dockerfile.source -t rustfs-issue-2815-local . +``` + +## Run + +```bash +docker compose -f .docker/test/issues-2815/docker-compose.yml up -d +``` + +If the image platform is not `linux/arm64`, align compose explicitly: + +```bash +RUSTFS_DOCKER_PLATFORM=linux/amd64 docker compose -f .docker/test/issues-2815/docker-compose.yml up -d +``` + +## Health Checks + +Container-level healthcheck is now included and probes: + +```bash +curl -fsS http://127.0.0.1:9000/health +``` + +Manual checks: + +```bash +curl -i http://127.0.0.1:9101/health/ready +curl -i http://127.0.0.1:9102/health/ready +curl -i http://127.0.0.1:9103/health/ready +curl -i http://127.0.0.1:9104/health/ready +``` + +## RPC Secret Requirement + +The current source build no longer reproduces the original `FixArray(1)` decode error from issue `#2815`. + +Earlier local Docker attempts failed during erasure bootstrap with: + +```text +No valid auth token +store init failed to load formats after 10 retries: erasure read quorum +``` + +Root cause: + +- RPC authentication rejects the default secret `rustfsadmin` +- distributed local Docker validation therefore needs an explicit non-default secret + +This compose now sets both: + +- `RUSTFS_SECRET_KEY=issue-2815-secret` +- `RUSTFS_RPC_SECRET=issue-2815-rpc-secret` + +With those values in place, the current 4-node local Docker cluster reaches healthy state and `/health/ready` returns `200`. + +In other words: + +- `RUSTFS_ACCESS_KEY` may still be `rustfsadmin` for local service credentials if desired +- `RUSTFS_SECRET_KEY` can still be used for service credentials +- but RPC authentication must not resolve to the default secret value `rustfsadmin` +- if `RUSTFS_RPC_SECRET` is unset, the code falls back to `RUSTFS_SECRET_KEY` +- so at least one of them must provide a non-default shared secret for internode RPC signing + +## Suggested Debug Commands + +```bash +docker compose -f .docker/test/issues-2815/docker-compose.yml ps +docker compose -f .docker/test/issues-2815/docker-compose.yml logs --no-color --tail=200 +docker compose -f .docker/test/issues-2815/docker-compose.yml down -v +``` diff --git a/.docker/test/issues-2815/docker-compose.yml b/.docker/test/issues-2815/docker-compose.yml new file mode 100644 index 000000000..e20c0f897 --- /dev/null +++ b/.docker/test/issues-2815/docker-compose.yml @@ -0,0 +1,120 @@ +services: + rustfs1: + image: rustfs-issue-2815-local + platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64} + hostname: rustfs1 + container_name: rustfs-issue-2815-rustfs1 + environment: + RUSTFS_ADDRESS: "0.0.0.0:9000" + RUSTFS_ACCESS_KEY: "rustfsadmin" + RUSTFS_SECRET_KEY: "issue-2815-secret" + RUSTFS_RPC_SECRET: "issue-2815-rpc-secret" + RUSTFS_CONSOLE_ENABLE: "false" + RUST_LOG: "info" + RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true" + RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}" + volumes: + - ./data/rustfs1-disk0:/data/rustfs0 + - ./data/rustfs1-disk1:/data/rustfs1 + - ./data/rustfs1-disk2:/data/rustfs2 + - ./data/rustfs1-disk3:/data/rustfs3 + networks: [rustfs-issue-2815-net] + ports: + - "9101:9000" + healthcheck: + test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"] + interval: 15s + timeout: 5s + retries: 8 + start_period: 30s + + rustfs2: + image: rustfs-issue-2815-local + platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64} + hostname: rustfs2 + container_name: rustfs-issue-2815-rustfs2 + environment: + RUSTFS_ADDRESS: "0.0.0.0:9000" + RUSTFS_ACCESS_KEY: "rustfsadmin" + RUSTFS_SECRET_KEY: "issue-2815-secret" + RUSTFS_RPC_SECRET: "issue-2815-rpc-secret" + RUSTFS_CONSOLE_ENABLE: "false" + RUST_LOG: "info" + RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true" + RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}" + volumes: + - ./data/rustfs2-disk0:/data/rustfs0 + - ./data/rustfs2-disk1:/data/rustfs1 + - ./data/rustfs2-disk2:/data/rustfs2 + - ./data/rustfs2-disk3:/data/rustfs3 + networks: [rustfs-issue-2815-net] + ports: + - "9102:9000" + healthcheck: + test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"] + interval: 15s + timeout: 5s + retries: 8 + start_period: 30s + + rustfs3: + image: rustfs-issue-2815-local + platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64} + hostname: rustfs3 + container_name: rustfs-issue-2815-rustfs3 + environment: + RUSTFS_ADDRESS: "0.0.0.0:9000" + RUSTFS_ACCESS_KEY: "rustfsadmin" + RUSTFS_SECRET_KEY: "issue-2815-secret" + RUSTFS_RPC_SECRET: "issue-2815-rpc-secret" + RUSTFS_CONSOLE_ENABLE: "false" + RUST_LOG: "info" + RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true" + RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}" + volumes: + - ./data/rustfs3-disk0:/data/rustfs0 + - ./data/rustfs3-disk1:/data/rustfs1 + - ./data/rustfs3-disk2:/data/rustfs2 + - ./data/rustfs3-disk3:/data/rustfs3 + networks: [rustfs-issue-2815-net] + ports: + - "9103:9000" + healthcheck: + test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"] + interval: 15s + timeout: 5s + retries: 8 + start_period: 30s + + rustfs4: + image: rustfs-issue-2815-local + platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64} + hostname: rustfs4 + container_name: rustfs-issue-2815-rustfs4 + environment: + RUSTFS_ADDRESS: "0.0.0.0:9000" + RUSTFS_ACCESS_KEY: "rustfsadmin" + RUSTFS_SECRET_KEY: "issue-2815-secret" + RUSTFS_RPC_SECRET: "issue-2815-rpc-secret" + RUSTFS_CONSOLE_ENABLE: "false" + RUST_LOG: "info" + RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true" + RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}" + volumes: + - ./data/rustfs4-disk0:/data/rustfs0 + - ./data/rustfs4-disk1:/data/rustfs1 + - ./data/rustfs4-disk2:/data/rustfs2 + - ./data/rustfs4-disk3:/data/rustfs3 + networks: [rustfs-issue-2815-net] + ports: + - "9104:9000" + healthcheck: + test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"] + interval: 15s + timeout: 5s + retries: 8 + start_period: 30s + +networks: + rustfs-issue-2815-net: + name: rustfs-issue-2815-net diff --git a/Dockerfile.source b/Dockerfile.source index 4b61d5a50..6e3af9ded 100644 --- a/Dockerfile.source +++ b/Dockerfile.source @@ -26,6 +26,7 @@ ARG TARGETPLATFORM ARG BUILDPLATFORM +ARG TARGETARCH # ----------------------------- # Build stage @@ -35,6 +36,7 @@ FROM rust:1.93-trixie AS builder # Re-declare args after FROM ARG TARGETPLATFORM ARG BUILDPLATFORM +ARG TARGETARCH # Debug: print platforms RUN echo "Build info -> BUILDPLATFORM=${BUILDPLATFORM}, TARGETPLATFORM=${TARGETPLATFORM}" @@ -87,6 +89,7 @@ ENV CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++ ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-linux-gnu-gcc ENV CC_x86_64_unknown_linux_gnu=x86_64-linux-gnu-gcc ENV CXX_x86_64_unknown_linux_gnu=x86_64-linux-gnu-g++ +ENV CARGO_TARGET_DIR=/usr/src/rustfs/target/docker-build WORKDIR /usr/src/rustfs @@ -120,33 +123,33 @@ ENV CARGO_NET_GIT_FETCH_WITH_CLI=true \ # Generate protobuf/flatbuffers code (uses protoc/flatc from distro) RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/usr/local/cargo/git \ - --mount=type=cache,target=/usr/src/rustfs/target \ + --mount=type=cache,target=/usr/src/rustfs/target/docker-build \ cargo run --bin gproto # Build RustFS (target depends on TARGETPLATFORM) RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/usr/local/cargo/git \ - --mount=type=cache,target=/usr/src/rustfs/target \ + --mount=type=cache,target=/usr/src/rustfs/target/docker-build \ set -eux; \ rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu; \ target_platform="${TARGETPLATFORM:-}"; \ if [ -z "${target_platform}" ]; then \ - case "$(uname -m)" in \ - x86_64) target_platform="linux/amd64" ;; \ - aarch64|arm64) target_platform="linux/arm64" ;; \ - *) target_platform="linux/amd64" ;; \ + case "${TARGETARCH:-$(uname -m)}" in \ + amd64|x86_64) target_platform="linux/amd64" ;; \ + arm64|aarch64) target_platform="linux/arm64" ;; \ + *) echo "Unsupported target architecture: ${TARGETARCH:-$(uname -m)}" >&2; exit 1 ;; \ esac; \ fi; \ case "${target_platform}" in \ linux/amd64) \ echo "Building for x86_64-unknown-linux-gnu"; \ cargo build --release --locked --target x86_64-unknown-linux-gnu --bin rustfs -j "$(nproc)"; \ - install -m 0755 target/x86_64-unknown-linux-gnu/release/rustfs /usr/local/bin/rustfs \ + install -m 0755 "${CARGO_TARGET_DIR}/x86_64-unknown-linux-gnu/release/rustfs" /usr/local/bin/rustfs \ ;; \ linux/arm64) \ echo "Building for aarch64-unknown-linux-gnu"; \ cargo build --release --locked --target aarch64-unknown-linux-gnu --bin rustfs -j "$(nproc)"; \ - install -m 0755 target/aarch64-unknown-linux-gnu/release/rustfs /usr/local/bin/rustfs \ + install -m 0755 "${CARGO_TARGET_DIR}/aarch64-unknown-linux-gnu/release/rustfs" /usr/local/bin/rustfs \ ;; \ *) \ echo "Unsupported target platform=${target_platform}" >&2; exit 1 \ @@ -212,6 +215,7 @@ RUN set -eux; \ apt-get update; \ apt-get install -y --no-install-recommends \ ca-certificates \ + curl \ tzdata \ coreutils; \ rm -rf /var/lib/apt/lists/* diff --git a/rustfs/src/storage/rpc/mod.rs b/rustfs/src/storage/rpc/mod.rs index bec1a4e23..86c5f8f90 100644 --- a/rustfs/src/storage/rpc/mod.rs +++ b/rustfs/src/storage/rpc/mod.rs @@ -36,6 +36,7 @@ pub(crate) fn encode_msgpack_map(value: &T) -> Result, rmp mod tests { use super::*; use rmp_serde::Deserializer; + use rustfs_madmin::{BackendDisks, BackendInfo, Disk, ITEM_ONLINE, StorageInfo}; use serde::Deserialize; use std::collections::HashMap; use std::io::Cursor; @@ -90,4 +91,66 @@ mod tests { let decoded: Nested = Deserialize::deserialize(&mut Deserializer::new(Cursor::new(&buf))).unwrap(); assert_eq!(val, decoded); } + + #[test] + fn storage_info_map_encoding_round_trip_matches_issue_2815_contract() { + let mut online_disks = BackendDisks::new(); + online_disks.0.insert("node1".into(), 4); + let mut offline_disks = BackendDisks::new(); + offline_disks.0.insert("node2".into(), 0); + + let value = StorageInfo { + disks: vec![Disk { + endpoint: "node1:9000".into(), + state: ITEM_ONLINE.into(), + local: true, + pool_index: 0, + set_index: 0, + disk_index: 0, + ..Default::default() + }], + backend: BackendInfo { + online_disks, + offline_disks, + total_sets: vec![1], + drives_per_set: vec![4], + ..Default::default() + }, + }; + + let buf = encode_msgpack_map(&value).unwrap(); + let marker = buf[0]; + assert!( + (0x80..=0x8f).contains(&marker) || marker == 0xde || marker == 0xdf, + "StorageInfo map-encoded payload must start with a map marker, got 0x{marker:02x}" + ); + let decoded: StorageInfo = Deserialize::deserialize(&mut Deserializer::new(Cursor::new(&buf))).unwrap(); + + assert_eq!(decoded.disks.len(), 1); + assert_eq!(decoded.disks[0].endpoint, "node1:9000"); + assert_eq!(decoded.backend.online_disks.0.get("node1"), Some(&4)); + assert_eq!(decoded.backend.offline_disks.0.get("node2"), Some(&0)); + } + + #[test] + fn storage_info_tuple_encoding_uses_array_marker_that_issue_2815_fixed() { + let mut online_disks = BackendDisks::new(); + online_disks.0.insert("node1".into(), 4); + + let value = StorageInfo { + backend: BackendInfo { + online_disks, + ..Default::default() + }, + ..Default::default() + }; + + let mut buf = Vec::new(); + value.serialize(&mut Serializer::new(&mut buf)).unwrap(); + let marker = buf[0]; + assert!( + (0x90..=0x9f).contains(&marker) || marker == 0xdc || marker == 0xdd, + "legacy tuple-mode StorageInfo must start with an array marker, got 0x{marker:02x}" + ); + } }