fix(rpc): add issue 2815 regression and docker validation (#2828)

This commit is contained in:
houseme
2026-05-06 20:22:13 +08:00
committed by GitHub
parent 3898d524fe
commit 41ba34a145
5 changed files with 302 additions and 8 deletions

1
.docker/test/issues-2815/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
data/

View File

@@ -0,0 +1,106 @@
# Issue 2815 Local Docker Verification
## Purpose
This directory contains the local distributed Docker verification assets used to validate issue `#2815` against the current source build.
The target behavior is:
- 4-node distributed cluster starts successfully
- `/health/ready` becomes reachable on each node
- logs no longer contain `storage_info failed: Io error: wrong msgpack marker FixArray(1)`
- internode RPC authentication succeeds with an explicit non-default RPC secret
## Files
- `docker-compose.yml`: 4-node distributed cluster using a locally built image
## Data Directories
Create the bind-mount directories before `docker compose up`:
```bash
mkdir -p .docker/test/issues-2815/data/rustfs{1..4}-disk{0..3}
```
## Build
Apple Silicon / arm64 host:
```bash
docker build --platform linux/arm64 -f Dockerfile.source -t rustfs-issue-2815-local .
```
If you intentionally want amd64 emulation:
```bash
docker build --platform linux/amd64 -f Dockerfile.source -t rustfs-issue-2815-local .
```
## Run
```bash
docker compose -f .docker/test/issues-2815/docker-compose.yml up -d
```
If the image platform is not `linux/arm64`, align compose explicitly:
```bash
RUSTFS_DOCKER_PLATFORM=linux/amd64 docker compose -f .docker/test/issues-2815/docker-compose.yml up -d
```
## Health Checks
Container-level healthcheck is now included and probes:
```bash
curl -fsS http://127.0.0.1:9000/health
```
Manual checks:
```bash
curl -i http://127.0.0.1:9101/health/ready
curl -i http://127.0.0.1:9102/health/ready
curl -i http://127.0.0.1:9103/health/ready
curl -i http://127.0.0.1:9104/health/ready
```
## RPC Secret Requirement
The current source build no longer reproduces the original `FixArray(1)` decode error from issue `#2815`.
Earlier local Docker attempts failed during erasure bootstrap with:
```text
No valid auth token
store init failed to load formats after 10 retries: erasure read quorum
```
Root cause:
- RPC authentication rejects the default secret `rustfsadmin`
- distributed local Docker validation therefore needs an explicit non-default secret
This compose now sets both:
- `RUSTFS_SECRET_KEY=issue-2815-secret`
- `RUSTFS_RPC_SECRET=issue-2815-rpc-secret`
With those values in place, the current 4-node local Docker cluster reaches healthy state and `/health/ready` returns `200`.
In other words:
- `RUSTFS_ACCESS_KEY` may still be `rustfsadmin` for local service credentials if desired
- `RUSTFS_SECRET_KEY` can still be used for service credentials
- but RPC authentication must not resolve to the default secret value `rustfsadmin`
- if `RUSTFS_RPC_SECRET` is unset, the code falls back to `RUSTFS_SECRET_KEY`
- so at least one of them must provide a non-default shared secret for internode RPC signing
## Suggested Debug Commands
```bash
docker compose -f .docker/test/issues-2815/docker-compose.yml ps
docker compose -f .docker/test/issues-2815/docker-compose.yml logs --no-color --tail=200
docker compose -f .docker/test/issues-2815/docker-compose.yml down -v
```

View File

@@ -0,0 +1,120 @@
services:
rustfs1:
image: rustfs-issue-2815-local
platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64}
hostname: rustfs1
container_name: rustfs-issue-2815-rustfs1
environment:
RUSTFS_ADDRESS: "0.0.0.0:9000"
RUSTFS_ACCESS_KEY: "rustfsadmin"
RUSTFS_SECRET_KEY: "issue-2815-secret"
RUSTFS_RPC_SECRET: "issue-2815-rpc-secret"
RUSTFS_CONSOLE_ENABLE: "false"
RUST_LOG: "info"
RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true"
RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}"
volumes:
- ./data/rustfs1-disk0:/data/rustfs0
- ./data/rustfs1-disk1:/data/rustfs1
- ./data/rustfs1-disk2:/data/rustfs2
- ./data/rustfs1-disk3:/data/rustfs3
networks: [rustfs-issue-2815-net]
ports:
- "9101:9000"
healthcheck:
test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 30s
rustfs2:
image: rustfs-issue-2815-local
platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64}
hostname: rustfs2
container_name: rustfs-issue-2815-rustfs2
environment:
RUSTFS_ADDRESS: "0.0.0.0:9000"
RUSTFS_ACCESS_KEY: "rustfsadmin"
RUSTFS_SECRET_KEY: "issue-2815-secret"
RUSTFS_RPC_SECRET: "issue-2815-rpc-secret"
RUSTFS_CONSOLE_ENABLE: "false"
RUST_LOG: "info"
RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true"
RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}"
volumes:
- ./data/rustfs2-disk0:/data/rustfs0
- ./data/rustfs2-disk1:/data/rustfs1
- ./data/rustfs2-disk2:/data/rustfs2
- ./data/rustfs2-disk3:/data/rustfs3
networks: [rustfs-issue-2815-net]
ports:
- "9102:9000"
healthcheck:
test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 30s
rustfs3:
image: rustfs-issue-2815-local
platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64}
hostname: rustfs3
container_name: rustfs-issue-2815-rustfs3
environment:
RUSTFS_ADDRESS: "0.0.0.0:9000"
RUSTFS_ACCESS_KEY: "rustfsadmin"
RUSTFS_SECRET_KEY: "issue-2815-secret"
RUSTFS_RPC_SECRET: "issue-2815-rpc-secret"
RUSTFS_CONSOLE_ENABLE: "false"
RUST_LOG: "info"
RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true"
RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}"
volumes:
- ./data/rustfs3-disk0:/data/rustfs0
- ./data/rustfs3-disk1:/data/rustfs1
- ./data/rustfs3-disk2:/data/rustfs2
- ./data/rustfs3-disk3:/data/rustfs3
networks: [rustfs-issue-2815-net]
ports:
- "9103:9000"
healthcheck:
test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 30s
rustfs4:
image: rustfs-issue-2815-local
platform: ${RUSTFS_DOCKER_PLATFORM:-linux/arm64}
hostname: rustfs4
container_name: rustfs-issue-2815-rustfs4
environment:
RUSTFS_ADDRESS: "0.0.0.0:9000"
RUSTFS_ACCESS_KEY: "rustfsadmin"
RUSTFS_SECRET_KEY: "issue-2815-secret"
RUSTFS_RPC_SECRET: "issue-2815-rpc-secret"
RUSTFS_CONSOLE_ENABLE: "false"
RUST_LOG: "info"
RUSTFS_UNSAFE_BYPASS_DISK_CHECK: "true"
RUSTFS_VOLUMES: "http://rustfs{1...4}:9000/data/rustfs{0...3}"
volumes:
- ./data/rustfs4-disk0:/data/rustfs0
- ./data/rustfs4-disk1:/data/rustfs1
- ./data/rustfs4-disk2:/data/rustfs2
- ./data/rustfs4-disk3:/data/rustfs3
networks: [rustfs-issue-2815-net]
ports:
- "9104:9000"
healthcheck:
test: ["CMD", "sh", "-c", "curl -fsS http://127.0.0.1:9000/health || exit 1"]
interval: 15s
timeout: 5s
retries: 8
start_period: 30s
networks:
rustfs-issue-2815-net:
name: rustfs-issue-2815-net

View File

@@ -26,6 +26,7 @@
ARG TARGETPLATFORM
ARG BUILDPLATFORM
ARG TARGETARCH
# -----------------------------
# Build stage
@@ -35,6 +36,7 @@ FROM rust:1.93-trixie AS builder
# Re-declare args after FROM
ARG TARGETPLATFORM
ARG BUILDPLATFORM
ARG TARGETARCH
# Debug: print platforms
RUN echo "Build info -> BUILDPLATFORM=${BUILDPLATFORM}, TARGETPLATFORM=${TARGETPLATFORM}"
@@ -87,6 +89,7 @@ ENV CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-linux-gnu-gcc
ENV CC_x86_64_unknown_linux_gnu=x86_64-linux-gnu-gcc
ENV CXX_x86_64_unknown_linux_gnu=x86_64-linux-gnu-g++
ENV CARGO_TARGET_DIR=/usr/src/rustfs/target/docker-build
WORKDIR /usr/src/rustfs
@@ -120,33 +123,33 @@ ENV CARGO_NET_GIT_FETCH_WITH_CLI=true \
# Generate protobuf/flatbuffers code (uses protoc/flatc from distro)
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/usr/src/rustfs/target \
--mount=type=cache,target=/usr/src/rustfs/target/docker-build \
cargo run --bin gproto
# Build RustFS (target depends on TARGETPLATFORM)
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/usr/src/rustfs/target \
--mount=type=cache,target=/usr/src/rustfs/target/docker-build \
set -eux; \
rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu; \
target_platform="${TARGETPLATFORM:-}"; \
if [ -z "${target_platform}" ]; then \
case "$(uname -m)" in \
x86_64) target_platform="linux/amd64" ;; \
aarch64|arm64) target_platform="linux/arm64" ;; \
*) target_platform="linux/amd64" ;; \
case "${TARGETARCH:-$(uname -m)}" in \
amd64|x86_64) target_platform="linux/amd64" ;; \
arm64|aarch64) target_platform="linux/arm64" ;; \
*) echo "Unsupported target architecture: ${TARGETARCH:-$(uname -m)}" >&2; exit 1 ;; \
esac; \
fi; \
case "${target_platform}" in \
linux/amd64) \
echo "Building for x86_64-unknown-linux-gnu"; \
cargo build --release --locked --target x86_64-unknown-linux-gnu --bin rustfs -j "$(nproc)"; \
install -m 0755 target/x86_64-unknown-linux-gnu/release/rustfs /usr/local/bin/rustfs \
install -m 0755 "${CARGO_TARGET_DIR}/x86_64-unknown-linux-gnu/release/rustfs" /usr/local/bin/rustfs \
;; \
linux/arm64) \
echo "Building for aarch64-unknown-linux-gnu"; \
cargo build --release --locked --target aarch64-unknown-linux-gnu --bin rustfs -j "$(nproc)"; \
install -m 0755 target/aarch64-unknown-linux-gnu/release/rustfs /usr/local/bin/rustfs \
install -m 0755 "${CARGO_TARGET_DIR}/aarch64-unknown-linux-gnu/release/rustfs" /usr/local/bin/rustfs \
;; \
*) \
echo "Unsupported target platform=${target_platform}" >&2; exit 1 \
@@ -212,6 +215,7 @@ RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
tzdata \
coreutils; \
rm -rf /var/lib/apt/lists/*

View File

@@ -36,6 +36,7 @@ pub(crate) fn encode_msgpack_map<T: Serialize>(value: &T) -> Result<Vec<u8>, rmp
mod tests {
use super::*;
use rmp_serde::Deserializer;
use rustfs_madmin::{BackendDisks, BackendInfo, Disk, ITEM_ONLINE, StorageInfo};
use serde::Deserialize;
use std::collections::HashMap;
use std::io::Cursor;
@@ -90,4 +91,66 @@ mod tests {
let decoded: Nested = Deserialize::deserialize(&mut Deserializer::new(Cursor::new(&buf))).unwrap();
assert_eq!(val, decoded);
}
#[test]
fn storage_info_map_encoding_round_trip_matches_issue_2815_contract() {
let mut online_disks = BackendDisks::new();
online_disks.0.insert("node1".into(), 4);
let mut offline_disks = BackendDisks::new();
offline_disks.0.insert("node2".into(), 0);
let value = StorageInfo {
disks: vec![Disk {
endpoint: "node1:9000".into(),
state: ITEM_ONLINE.into(),
local: true,
pool_index: 0,
set_index: 0,
disk_index: 0,
..Default::default()
}],
backend: BackendInfo {
online_disks,
offline_disks,
total_sets: vec![1],
drives_per_set: vec![4],
..Default::default()
},
};
let buf = encode_msgpack_map(&value).unwrap();
let marker = buf[0];
assert!(
(0x80..=0x8f).contains(&marker) || marker == 0xde || marker == 0xdf,
"StorageInfo map-encoded payload must start with a map marker, got 0x{marker:02x}"
);
let decoded: StorageInfo = Deserialize::deserialize(&mut Deserializer::new(Cursor::new(&buf))).unwrap();
assert_eq!(decoded.disks.len(), 1);
assert_eq!(decoded.disks[0].endpoint, "node1:9000");
assert_eq!(decoded.backend.online_disks.0.get("node1"), Some(&4));
assert_eq!(decoded.backend.offline_disks.0.get("node2"), Some(&0));
}
#[test]
fn storage_info_tuple_encoding_uses_array_marker_that_issue_2815_fixed() {
let mut online_disks = BackendDisks::new();
online_disks.0.insert("node1".into(), 4);
let value = StorageInfo {
backend: BackendInfo {
online_disks,
..Default::default()
},
..Default::default()
};
let mut buf = Vec::new();
value.serialize(&mut Serializer::new(&mut buf)).unwrap();
let marker = buf[0];
assert!(
(0x90..=0x9f).contains(&marker) || marker == 0xdc || marker == 0xdd,
"legacy tuple-mode StorageInfo must start with an array marker, got 0x{marker:02x}"
);
}
}