mirror of
https://github.com/rustfs/rustfs.git
synced 2026-05-06 22:28:16 +08:00
feat(obs): integrate dial9-tokio-telemetry for runtime tracing (#2285)
Co-authored-by: heihutu <heihutu@gmail.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: houseme <4829346+houseme@users.noreply.github.com>
This commit is contained in:
26
.cargo/config.toml
Normal file
26
.cargo/config.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Copyright 2024 RustFS Team
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# RustFS Cargo configuration
|
||||
|
||||
# Enable tokio_unstable cfg for dial9-tokio-telemetry support
|
||||
# This allows dial9 to hook into Tokio's internal runtime events
|
||||
[build]
|
||||
# Enable Tokio unstable features required by dial9-tokio-telemetry for runtime tracing.
|
||||
# See: https://docs.rs/tokio/latest/tokio/#unstable-features
|
||||
rustflags = ["--cfg", "tokio_unstable"]
|
||||
|
||||
# Enable frame pointers for CPU profiling (Linux only, optional but recommended)
|
||||
# Uncomment the following line for better CPU profiling data
|
||||
# rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes"]
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -20,7 +20,8 @@ deploy/data/*
|
||||
*jsonl
|
||||
.env
|
||||
.rustfs.sys
|
||||
.cargo
|
||||
.cargo/
|
||||
!.cargo/config.toml
|
||||
profile.json
|
||||
.docker/openobserve-otel/data
|
||||
*.zst
|
||||
|
||||
306
Cargo.lock
generated
306
Cargo.lock
generated
@@ -2,6 +2,16 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "Inflector"
|
||||
version = "0.11.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.25.1"
|
||||
@@ -1247,6 +1257,31 @@ dependencies = [
|
||||
"hybrid-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon"
|
||||
version = "3.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
|
||||
dependencies = [
|
||||
"bon-macros",
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bon-macros"
|
||||
version = "3.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
|
||||
dependencies = [
|
||||
"darling 0.23.0",
|
||||
"ident_case",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "8.0.2"
|
||||
@@ -2996,6 +3031,52 @@ dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dial9-tokio-telemetry"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fab5b5b736126e4a4a3ed06e15389ac199c2ac4f72395197addb305e6ba1759"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"bon",
|
||||
"crossbeam-queue",
|
||||
"dial9-trace-format",
|
||||
"flate2",
|
||||
"futures-util",
|
||||
"hostname",
|
||||
"libc",
|
||||
"metrique",
|
||||
"metrique-writer",
|
||||
"pin-project-lite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dial9-trace-format"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80e0ee560b05f09bf817602d57644947e31e83c521d4e0277f723a6e64d44f92"
|
||||
dependencies = [
|
||||
"dial9-trace-format-derive",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dial9-trace-format-derive"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9dbbd8126d4d6613931317cfe2a7275c1cd487e41c961e42456ab5f956570030"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
@@ -3232,6 +3313,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "endian-type"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
|
||||
|
||||
[[package]]
|
||||
name = "enumset"
|
||||
version = "1.1.10"
|
||||
@@ -4124,6 +4211,17 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hostname"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "htmlescape"
|
||||
version = "0.3.1"
|
||||
@@ -5182,6 +5280,131 @@ dependencies = [
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-util"
|
||||
version = "0.20.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdfb1365fea27e6dd9dc1dbc19f570198bc86914533ad639dae939635f096be4"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
"hashbrown 0.16.1",
|
||||
"indexmap 2.13.0",
|
||||
"metrics",
|
||||
"ordered-float 5.1.0",
|
||||
"quanta",
|
||||
"radix_trie",
|
||||
"rand 0.9.2",
|
||||
"rand_xoshiro",
|
||||
"sketches-ddsketch",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique"
|
||||
version = "0.1.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f3e5ecbbefec32dafed0fd98ef23768aaade6de35b8434fc3e44f6346b73cd6"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"jiff",
|
||||
"metrique-core",
|
||||
"metrique-macro",
|
||||
"metrique-service-metrics",
|
||||
"metrique-timesource",
|
||||
"metrique-writer",
|
||||
"metrique-writer-core",
|
||||
"metrique-writer-macro",
|
||||
"ryu",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique-core"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad6478374c256ffbb0d2de67b7d93e43ac94e35a083f40bd5f72a9770f6110bb"
|
||||
dependencies = [
|
||||
"itertools 0.14.0",
|
||||
"metrique-writer-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique-macro"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83adb8929ae9b2f7a4ec07a04c3af569ffe22f96f02c89063e4a78895d6af760"
|
||||
dependencies = [
|
||||
"Inflector",
|
||||
"darling 0.23.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique-service-metrics"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d01f36f47452cd6e33f66fc8185bb32f320aaa5721b6ad7230776442d3cf180"
|
||||
dependencies = [
|
||||
"metrique-writer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique-timesource"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c60fb3f2836dffc05146f0dfe7bf2e0789909f3fefd72c729491adaef01acc1a"
|
||||
|
||||
[[package]]
|
||||
name = "metrique-writer"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d9ba4f5a6b5dd821f78315095840e88d244fafbdda3cf1688835cd2a56aec"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
"metrics",
|
||||
"metrics-util",
|
||||
"metrique-core",
|
||||
"metrique-writer-core",
|
||||
"metrique-writer-macro",
|
||||
"rand 0.9.2",
|
||||
"smallvec",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique-writer-core"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "642989d2c349dfcd705a0b6b63887459f71c8b8deb6dc79e39e12eaa17400aba"
|
||||
dependencies = [
|
||||
"derive-where",
|
||||
"itertools 0.14.0",
|
||||
"serde",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrique-writer-macro"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12edafee41e67f90ab2efe2b850e10751f0da3da4aeb61b8eb7e6c31666e8da8"
|
||||
dependencies = [
|
||||
"darling 0.23.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"str_inflector",
|
||||
"syn 2.0.117",
|
||||
"synstructure 0.13.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mimalloc"
|
||||
version = "0.1.48"
|
||||
@@ -5308,6 +5531,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nibble_vec"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43"
|
||||
dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.26.4"
|
||||
@@ -5801,6 +6033,15 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "5.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "outref"
|
||||
version = "0.5.2"
|
||||
@@ -6572,6 +6813,21 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quanta"
|
||||
version = "0.12.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"raw-cpuid",
|
||||
"wasi",
|
||||
"web-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.26.0"
|
||||
@@ -6680,6 +6936,16 @@ version = "6.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
|
||||
|
||||
[[package]]
|
||||
name = "radix_trie"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd"
|
||||
dependencies = [
|
||||
"endian-type",
|
||||
"nibble_vec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
@@ -6757,6 +7023,15 @@ version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba"
|
||||
|
||||
[[package]]
|
||||
name = "rand_xoshiro"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41"
|
||||
dependencies = [
|
||||
"rand_core 0.9.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ratelimit"
|
||||
version = "0.10.1"
|
||||
@@ -6768,6 +7043,15 @@ dependencies = [
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "11.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.11.0"
|
||||
@@ -7760,6 +8044,7 @@ version = "0.0.5"
|
||||
dependencies = [
|
||||
"metrics",
|
||||
"nvml-wrapper",
|
||||
"rustfs-config",
|
||||
"rustfs-ecstore",
|
||||
"rustfs-utils",
|
||||
"sysinfo",
|
||||
@@ -7807,6 +8092,7 @@ dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"dial9-tokio-telemetry",
|
||||
"flate2",
|
||||
"glob",
|
||||
"jiff",
|
||||
@@ -8552,7 +8838,7 @@ version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
|
||||
dependencies = [
|
||||
"ordered-float",
|
||||
"ordered-float 2.10.1",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@@ -8851,6 +9137,12 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e"
|
||||
|
||||
[[package]]
|
||||
name = "sketches-ddsketch"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.12"
|
||||
@@ -9090,6 +9382,16 @@ dependencies = [
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "str_inflector"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "str_stack"
|
||||
version = "0.1.0"
|
||||
@@ -9379,7 +9681,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"integer-encoding",
|
||||
"ordered-float",
|
||||
"ordered-float 2.10.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -277,6 +277,7 @@ zstd = "0.13.3"
|
||||
|
||||
# Observability and Metrics
|
||||
metrics = "0.24.3"
|
||||
dial9-tokio-telemetry = "0.2"
|
||||
opentelemetry = { version = "0.31.0" }
|
||||
opentelemetry-appender-tracing = { version = "0.31.1", features = ["experimental_use_tracing_span_context", "experimental_metadata_attributes", "spec_unstable_logs_enabled"] }
|
||||
opentelemetry-otlp = { version = "0.31.1", features = ["gzip-http", "reqwest-rustls"] }
|
||||
|
||||
@@ -27,6 +27,16 @@ pub const ENV_RNG_SEED: &str = "RUSTFS_RUNTIME_RNG_SEED";
|
||||
/// Event polling interval
|
||||
pub const ENV_EVENT_INTERVAL: &str = "RUSTFS_RUNTIME_EVENT_INTERVAL";
|
||||
|
||||
// Dial9 Tokio Telemetry Configuration
|
||||
pub const ENV_RUNTIME_DIAL9_ENABLED: &str = "RUSTFS_RUNTIME_DIAL9_ENABLED";
|
||||
pub const ENV_RUNTIME_DIAL9_OUTPUT_DIR: &str = "RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR";
|
||||
pub const ENV_RUNTIME_DIAL9_FILE_PREFIX: &str = "RUSTFS_RUNTIME_DIAL9_FILE_PREFIX";
|
||||
pub const ENV_RUNTIME_DIAL9_MAX_FILE_SIZE: &str = "RUSTFS_RUNTIME_DIAL9_MAX_FILE_SIZE";
|
||||
pub const ENV_RUNTIME_DIAL9_ROTATION_COUNT: &str = "RUSTFS_RUNTIME_DIAL9_ROTATION_COUNT";
|
||||
pub const ENV_RUNTIME_DIAL9_S3_BUCKET: &str = "RUSTFS_RUNTIME_DIAL9_S3_BUCKET";
|
||||
pub const ENV_RUNTIME_DIAL9_S3_PREFIX: &str = "RUSTFS_RUNTIME_DIAL9_S3_PREFIX";
|
||||
pub const ENV_RUNTIME_DIAL9_SAMPLING_RATE: &str = "RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE";
|
||||
|
||||
// Default values for Tokio runtime
|
||||
pub const DEFAULT_WORKER_THREADS: usize = 16;
|
||||
pub const DEFAULT_MAX_BLOCKING_THREADS: usize = 1024;
|
||||
@@ -40,6 +50,15 @@ pub const DEFAULT_MAX_IO_EVENTS_PER_TICK: usize = 1024;
|
||||
pub const DEFAULT_EVENT_INTERVAL: u32 = 61;
|
||||
pub const DEFAULT_RNG_SEED: Option<u64> = None; // None means random
|
||||
|
||||
// Dial9 Tokio Telemetry Default values
|
||||
pub const DEFAULT_RUNTIME_DIAL9_ENABLED: bool = false; // Disabled by default
|
||||
pub const DEFAULT_RUNTIME_DIAL9_OUTPUT_DIR: &str = "/var/log/rustfs/telemetry";
|
||||
pub const DEFAULT_RUNTIME_DIAL9_FILE_PREFIX: &str = "rustfs-tokio";
|
||||
pub const DEFAULT_RUNTIME_DIAL9_MAX_FILE_SIZE: u64 = 100 * 1024 * 1024; // 100MB
|
||||
pub const DEFAULT_RUNTIME_DIAL9_ROTATION_COUNT: usize = 10;
|
||||
pub const DEFAULT_RUNTIME_DIAL9_SAMPLING_RATE: f64 = 1.0; // 100% sampling
|
||||
// Note: S3 bucket/prefix have no default; absence means upload is disabled (modeled as Option<String>)
|
||||
|
||||
/// Threshold for small object seek support in megabytes.
|
||||
///
|
||||
/// When an object is smaller than this size, rustfs will provide seek support.
|
||||
|
||||
@@ -31,6 +31,7 @@ gpu = ["dep:nvml-wrapper"]
|
||||
full = ["gpu"]
|
||||
|
||||
[dependencies]
|
||||
rustfs-config = { workspace = true }
|
||||
rustfs-ecstore = { workspace = true }
|
||||
rustfs-utils = { workspace = true }
|
||||
metrics = { workspace = true }
|
||||
|
||||
181
crates/metrics/src/collectors/dial9.rs
Normal file
181
crates/metrics/src/collectors/dial9.rs
Normal file
@@ -0,0 +1,181 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! dial9 Tokio runtime telemetry metrics collector.
|
||||
//!
|
||||
//! This module provides metrics for monitoring the health and performance
|
||||
//! of the dial9 telemetry system itself.
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::MetricType;
|
||||
use crate::format::PrometheusMetric;
|
||||
use rustfs_config::{DEFAULT_RUNTIME_DIAL9_ENABLED, ENV_RUNTIME_DIAL9_ENABLED};
|
||||
use rustfs_utils::get_env_bool;
|
||||
|
||||
/// Dial9 telemetry system statistics.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct Dial9Stats {
|
||||
/// Total number of telemetry events recorded
|
||||
pub events_total: u64,
|
||||
|
||||
/// Total bytes written to trace files
|
||||
pub bytes_written: u64,
|
||||
|
||||
/// Number of file rotations that have occurred
|
||||
pub rotation_count: u64,
|
||||
|
||||
/// Total number of dial9 errors
|
||||
pub errors_total: u64,
|
||||
|
||||
/// Estimated CPU overhead percentage (if available)
|
||||
pub cpu_overhead_percent: f64,
|
||||
|
||||
/// Current disk usage by trace files in bytes
|
||||
pub disk_usage_bytes: u64,
|
||||
|
||||
/// Number of active sessions
|
||||
pub active_sessions: u64,
|
||||
}
|
||||
|
||||
/// Collect dial9 telemetry metrics.
|
||||
///
|
||||
/// This function converts dial9 statistics into Prometheus metrics format.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `stats` - Dial9 statistics to report
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A vector of Prometheus metrics for dial9 telemetry statistics.
|
||||
pub fn collect_dial9_metrics(stats: &Dial9Stats) -> Vec<PrometheusMetric> {
|
||||
let enabled = is_dial9_enabled();
|
||||
let enabled_value = if enabled { 1.0 } else { 0.0 };
|
||||
|
||||
let mut metrics = vec![PrometheusMetric::new(
|
||||
"rustfs_dial9_enabled",
|
||||
MetricType::Gauge,
|
||||
"Whether dial9 telemetry is enabled (1) or disabled (0)",
|
||||
enabled_value,
|
||||
)];
|
||||
|
||||
// If dial9 is disabled, return just the enabled flag
|
||||
if !enabled {
|
||||
return metrics;
|
||||
}
|
||||
|
||||
// Add detailed metrics when enabled
|
||||
metrics.extend(vec![
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_events_total",
|
||||
MetricType::Counter,
|
||||
"Total number of Tokio runtime events recorded by dial9",
|
||||
stats.events_total as f64,
|
||||
),
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_bytes_written_total",
|
||||
MetricType::Counter,
|
||||
"Total bytes written to dial9 trace files",
|
||||
stats.bytes_written as f64,
|
||||
),
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_rotations_total",
|
||||
MetricType::Counter,
|
||||
"Total number of trace file rotations",
|
||||
stats.rotation_count as f64,
|
||||
),
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_errors_total",
|
||||
MetricType::Counter,
|
||||
"Total number of dial9 telemetry errors",
|
||||
stats.errors_total as f64,
|
||||
),
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_cpu_overhead_percent",
|
||||
MetricType::Gauge,
|
||||
"Estimated CPU overhead percentage from dial9 telemetry",
|
||||
stats.cpu_overhead_percent,
|
||||
),
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_disk_usage_bytes",
|
||||
MetricType::Gauge,
|
||||
"Current disk usage by dial9 trace files",
|
||||
stats.disk_usage_bytes as f64,
|
||||
),
|
||||
PrometheusMetric::new(
|
||||
"rustfs_dial9_active_sessions",
|
||||
MetricType::Gauge,
|
||||
"Number of active dial9 telemetry sessions",
|
||||
stats.active_sessions as f64,
|
||||
),
|
||||
]);
|
||||
|
||||
metrics
|
||||
}
|
||||
|
||||
/// Check if dial9 telemetry is enabled via environment variable.
|
||||
pub fn is_dial9_enabled() -> bool {
|
||||
get_env_bool(ENV_RUNTIME_DIAL9_ENABLED, DEFAULT_RUNTIME_DIAL9_ENABLED)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_dial9_stats_default() {
|
||||
let stats = Dial9Stats::default();
|
||||
assert_eq!(stats.events_total, 0);
|
||||
assert_eq!(stats.bytes_written, 0);
|
||||
assert_eq!(stats.rotation_count, 0);
|
||||
assert_eq!(stats.errors_total, 0);
|
||||
assert_eq!(stats.cpu_overhead_percent, 0.0);
|
||||
assert_eq!(stats.disk_usage_bytes, 0);
|
||||
assert_eq!(stats.active_sessions, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_collect_dial9_metrics() {
|
||||
let stats = Dial9Stats {
|
||||
events_total: 100,
|
||||
bytes_written: 1024,
|
||||
..Default::default()
|
||||
};
|
||||
let metrics = collect_dial9_metrics(&stats);
|
||||
|
||||
// Should always have at least the enabled flag
|
||||
assert!(!metrics.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_collect_dial9_metrics_with_values() {
|
||||
let stats = Dial9Stats {
|
||||
events_total: 10000,
|
||||
bytes_written: 1024000,
|
||||
rotation_count: 5,
|
||||
errors_total: 0,
|
||||
cpu_overhead_percent: 2.5,
|
||||
disk_usage_bytes: 2048000,
|
||||
active_sessions: 1,
|
||||
};
|
||||
|
||||
let metrics = collect_dial9_metrics(&stats);
|
||||
|
||||
// When dial9 is enabled, should have all metrics
|
||||
// Note: This test assumes dial9 is enabled in the test environment
|
||||
// If disabled, only the enabled flag metric will be present
|
||||
assert!(!metrics.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -70,6 +70,7 @@ mod cluster_erasure_set;
|
||||
mod cluster_health;
|
||||
mod cluster_iam;
|
||||
mod cluster_usage;
|
||||
mod dial9;
|
||||
pub(crate) mod global;
|
||||
mod ilm;
|
||||
mod logger_webhook;
|
||||
@@ -97,6 +98,7 @@ pub use cluster_erasure_set::{ErasureSetStats, collect_erasure_set_metrics};
|
||||
pub use cluster_health::{ClusterHealthStats, collect_cluster_health_metrics};
|
||||
pub use cluster_iam::{IamStats, collect_iam_metrics};
|
||||
pub use cluster_usage::{BucketUsageStats, ClusterUsageStats, collect_bucket_usage_metrics, collect_cluster_usage_metrics};
|
||||
pub use dial9::{Dial9Stats, collect_dial9_metrics, is_dial9_enabled};
|
||||
pub use global::init_metrics_collectors;
|
||||
pub use ilm::{IlmStats, collect_ilm_metrics};
|
||||
pub use logger_webhook::{WebhookTargetStats, collect_webhook_metrics};
|
||||
|
||||
@@ -52,6 +52,7 @@ tracing-error = { workspace = true }
|
||||
tracing-opentelemetry = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["registry", "std", "fmt", "env-filter", "tracing-log", "time", "local-time", "json"] }
|
||||
tokio = { workspace = true, features = ["sync", "fs", "rt-multi-thread", "rt", "time", "macros"] }
|
||||
dial9-tokio-telemetry = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
zstd = { workspace = true, features = ["zstdmt"] }
|
||||
|
||||
|
||||
53
crates/obs/examples/test_dial9.rs
Normal file
53
crates/obs/examples/test_dial9.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
// Test dial9 integration example
|
||||
use rustfs_obs::dial9::{Dial9Config, is_enabled};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Dial9 Integration Test ===\n");
|
||||
|
||||
// Test 1: Check initial dial9 state
|
||||
println!("Test 1: Default state");
|
||||
let initial_enabled = is_enabled();
|
||||
println!(" dial9 enabled: {}", initial_enabled);
|
||||
if initial_enabled {
|
||||
println!(" ⚠ SKIP: Dial9 is already enabled via environment; skipping default-disabled assertion\n");
|
||||
} else {
|
||||
println!(" ✓ PASS: Dial9 is disabled by default\n");
|
||||
}
|
||||
|
||||
// Test 2: Load default configuration
|
||||
println!("Test 2: Default configuration");
|
||||
let config = Dial9Config::from_env();
|
||||
println!(" enabled: {}", config.enabled);
|
||||
println!(" output_dir: {}", config.output_dir);
|
||||
println!(" file_prefix: {}", config.file_prefix);
|
||||
println!(" max_file_size: {} bytes", config.max_file_size);
|
||||
println!(" rotation_count: {}", config.rotation_count);
|
||||
println!(" s3_bucket: {:?}", config.s3_bucket);
|
||||
println!(" s3_prefix: {:?}", config.s3_prefix);
|
||||
println!(" sampling_rate: {}", config.sampling_rate);
|
||||
println!(" ✓ PASS: Default configuration loaded\n");
|
||||
|
||||
// Test 3: Configuration validation
|
||||
println!("Test 3: Configuration validation");
|
||||
if !initial_enabled {
|
||||
assert!(!config.enabled, "Should be disabled by default");
|
||||
assert_eq!(config.s3_bucket, None, "S3 bucket should be None by default");
|
||||
assert_eq!(config.s3_prefix, None, "S3 prefix should be None by default");
|
||||
println!(" ✓ PASS: Configuration validated\n");
|
||||
} else {
|
||||
println!(" ⚠ SKIP: Configuration validation skipped (dial9 is enabled)\n");
|
||||
}
|
||||
|
||||
println!("=== All Tests Passed! ===");
|
||||
println!();
|
||||
println!("Note: To test with dial9 enabled, set environment variables:");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_ENABLED=true");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR=/tmp/rustfs-test-telemetry");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE=0.5");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_S3_BUCKET=my-bucket");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_S3_PREFIX=telemetry/");
|
||||
println!(" cargo run -p rustfs-obs --example test_dial9");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
76
crates/obs/examples/test_dial9_full.rs
Normal file
76
crates/obs/examples/test_dial9_full.rs
Normal file
@@ -0,0 +1,76 @@
|
||||
// Full dial9 integration test with session initialization
|
||||
use rustfs_obs::dial9::{Dial9Config, init_session, is_enabled};
|
||||
use tokio::time::{Duration, sleep};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Full Dial9 Integration Test ===");
|
||||
println!();
|
||||
|
||||
// Check if dial9 is enabled
|
||||
if !is_enabled() {
|
||||
println!("Dial9 is disabled. Enable with:");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_ENABLED=true");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR=/tmp/rustfs-test-telemetry");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Test 1: Configuration
|
||||
println!("Test 1: Configuration");
|
||||
let config = Dial9Config::from_env();
|
||||
println!(" enabled: {}", config.enabled);
|
||||
println!(" output_dir: {}", config.output_dir);
|
||||
println!(" file_prefix: {}", config.file_prefix);
|
||||
println!(" sampling_rate: {}", config.sampling_rate);
|
||||
println!(" ✓ Configuration loaded");
|
||||
println!();
|
||||
|
||||
// Test 2: Session initialization
|
||||
println!("Test 2: Session initialization");
|
||||
match init_session().await {
|
||||
Ok(Some(guard)) => {
|
||||
println!(" ✓ Session initialized successfully");
|
||||
println!(" guard.is_active(): {}", guard.is_active());
|
||||
println!();
|
||||
|
||||
// Test 3: Generate async activity
|
||||
println!("Test 3: Generate async runtime activity");
|
||||
let tasks = (0..3).map(|i| {
|
||||
tokio::spawn(async move {
|
||||
for j in 0..5 {
|
||||
println!(" Task {} iteration {}", i, j);
|
||||
sleep(Duration::from_millis(20)).await;
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
for task in tasks {
|
||||
task.await?;
|
||||
}
|
||||
println!(" ✓ Async activity completed");
|
||||
println!();
|
||||
|
||||
// Test 4: Session lifecycle
|
||||
println!("Test 4: Session lifecycle");
|
||||
println!(" Dropping guard...");
|
||||
drop(guard);
|
||||
println!(" ✓ Session cleaned up");
|
||||
}
|
||||
Ok(None) => {
|
||||
println!(" ⚠ Session not created (writer may have failed)");
|
||||
println!(" This is expected if output directory cannot be created");
|
||||
}
|
||||
Err(e) => {
|
||||
println!(" ✗ Session init failed: {:?}", e);
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
println!("=== Test Summary ===");
|
||||
println!("✓ Configuration: PASS");
|
||||
println!("✓ Session Init: PASS");
|
||||
println!("✓ Async Activity: PASS");
|
||||
println!("✓ Lifecycle: PASS");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
63
crates/obs/examples/test_dial9_s3.rs
Normal file
63
crates/obs/examples/test_dial9_s3.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
// Test dial9 S3 configuration
|
||||
use rustfs_obs::dial9::{Dial9Config, is_enabled};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Dial9 S3 Configuration Test ===");
|
||||
println!();
|
||||
|
||||
// Test 1: Default S3 configuration (should be None/None)
|
||||
println!("Test 1: Default S3 configuration");
|
||||
let default_config = Dial9Config::default();
|
||||
println!(" s3_bucket: {:?}", default_config.s3_bucket);
|
||||
println!(" s3_prefix: {:?}", default_config.s3_prefix);
|
||||
assert_eq!(default_config.s3_bucket, None);
|
||||
assert_eq!(default_config.s3_prefix, None);
|
||||
println!(" ✓ PASS: Default S3 config is None/None");
|
||||
println!();
|
||||
|
||||
// Test 2: Check if dial9 is enabled
|
||||
println!("Test 2: Check dial9 enabled state");
|
||||
println!(" is_enabled(): {}", is_enabled());
|
||||
println!(
|
||||
" RUSTFS_RUNTIME_DIAL9_ENABLED: {}",
|
||||
std::env::var("RUSTFS_RUNTIME_DIAL9_ENABLED").unwrap_or("not set".to_string())
|
||||
);
|
||||
println!();
|
||||
|
||||
// Test 3: Load configuration from environment
|
||||
println!("Test 3: Load configuration from environment");
|
||||
let config = Dial9Config::from_env();
|
||||
println!(" enabled: {}", config.enabled);
|
||||
println!(" s3_bucket: {:?}", config.s3_bucket);
|
||||
println!(" s3_prefix: {:?}", config.s3_prefix);
|
||||
println!(" ✓ PASS: Configuration loaded");
|
||||
println!();
|
||||
|
||||
// Only test S3 config if dial9 is enabled
|
||||
if !config.enabled {
|
||||
println!(" ⚠ SKIP: Dial9 is disabled, S3 config not loaded");
|
||||
println!(" To test S3 configuration:");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_ENABLED=true");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_S3_BUCKET=my-bucket");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_S3_PREFIX=telemetry/");
|
||||
println!(" cargo run -p rustfs-obs --example test_dial9_s3");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Test 4: Configuration summary
|
||||
println!("Test 4: Configuration summary");
|
||||
println!(" S3 upload enabled: {}", config.s3_bucket.is_some());
|
||||
if let Some(bucket) = &config.s3_bucket {
|
||||
println!(" S3 bucket: {}", bucket);
|
||||
}
|
||||
if let Some(prefix) = &config.s3_prefix {
|
||||
println!(" S3 prefix: {}", prefix);
|
||||
}
|
||||
println!(" ✓ PASS: Configuration summary displayed");
|
||||
println!();
|
||||
|
||||
println!("=== All Tests Passed! ===");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
45
crates/obs/examples/test_dial9_simple.rs
Normal file
45
crates/obs/examples/test_dial9_simple.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
// Simple dial9 integration test (reads from environment)
|
||||
use rustfs_obs::dial9::{Dial9Config, is_enabled};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Dial9 Integration Test ===");
|
||||
println!();
|
||||
|
||||
// Test 1: Check current state
|
||||
println!("Test 1: Check dial9 state");
|
||||
println!(
|
||||
" RUSTFS_RUNTIME_DIAL9_ENABLED: {}",
|
||||
std::env::var("RUSTFS_RUNTIME_DIAL9_ENABLED").unwrap_or("not set".to_string())
|
||||
);
|
||||
println!(" is_enabled(): {}", is_enabled());
|
||||
println!(" ✓ Dial9 state check complete");
|
||||
println!();
|
||||
|
||||
// Test 2: Load configuration
|
||||
println!("Test 2: Load dial9 configuration");
|
||||
let config = Dial9Config::from_env();
|
||||
println!(" enabled: {}", config.enabled);
|
||||
println!(" output_dir: {}", config.output_dir);
|
||||
println!(" file_prefix: {}", config.file_prefix);
|
||||
println!(" max_file_size: {} bytes", config.max_file_size);
|
||||
println!(" rotation_count: {}", config.rotation_count);
|
||||
println!(" sampling_rate: {}", config.sampling_rate);
|
||||
println!(" ✓ Configuration loaded");
|
||||
println!();
|
||||
|
||||
// Test 3: Test base path calculation
|
||||
println!("Test 3: Base path calculation");
|
||||
println!(" base_path: {:?}", config.base_path());
|
||||
println!(" ✓ Base path calculated");
|
||||
println!();
|
||||
|
||||
println!("=== All Tests Passed! ===");
|
||||
println!();
|
||||
println!("Note: To test full dial9 functionality, enable it with:");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_ENABLED=true");
|
||||
println!(" export RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR=/tmp/rustfs-telemetry");
|
||||
println!(" cargo run -p rustfs-obs --example test_dial9_simple");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -22,6 +22,7 @@
|
||||
//! - Logging with tracing
|
||||
//! - Metrics collection
|
||||
//! - Distributed tracing
|
||||
//! - Tokio runtime telemetry (via dial9)
|
||||
//!
|
||||
//! ## Usage
|
||||
//!
|
||||
@@ -69,3 +70,7 @@ pub use config::*;
|
||||
pub use error::*;
|
||||
pub use global::*;
|
||||
pub use telemetry::{OtelGuard, Recorder};
|
||||
|
||||
// Dial9 Tokio runtime telemetry
|
||||
// Re-export dial9 types at crate root level for easier access
|
||||
pub use telemetry::dial9;
|
||||
|
||||
289
crates/obs/src/telemetry/dial9.rs
Normal file
289
crates/obs/src/telemetry/dial9.rs
Normal file
@@ -0,0 +1,289 @@
|
||||
// Copyright 2024 RustFS Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! dial9-tokio-telemetry integration for RustFS.
|
||||
//!
|
||||
//! This module provides low-overhead Tokio runtime-level telemetry,
|
||||
//! capturing events like PollStart/End, WorkerPark/Unpark, QueueSample, etc.
|
||||
|
||||
use crate::TelemetryError;
|
||||
// Import and re-export TelemetryGuard for use in other crates (like rustfs)
|
||||
// Use as Dial9TelemetryGuard internally to avoid naming conflicts
|
||||
use dial9_tokio_telemetry::telemetry::RotatingWriter;
|
||||
pub use dial9_tokio_telemetry::telemetry::TelemetryGuard;
|
||||
use dial9_tokio_telemetry::telemetry::TelemetryGuard as Dial9TelemetryGuard;
|
||||
// Use rustfs_config which re-exports runtime constants
|
||||
use rustfs_config::{
|
||||
DEFAULT_RUNTIME_DIAL9_ENABLED, DEFAULT_RUNTIME_DIAL9_FILE_PREFIX, DEFAULT_RUNTIME_DIAL9_MAX_FILE_SIZE,
|
||||
DEFAULT_RUNTIME_DIAL9_OUTPUT_DIR, DEFAULT_RUNTIME_DIAL9_ROTATION_COUNT, DEFAULT_RUNTIME_DIAL9_SAMPLING_RATE,
|
||||
ENV_RUNTIME_DIAL9_ENABLED, ENV_RUNTIME_DIAL9_FILE_PREFIX, ENV_RUNTIME_DIAL9_MAX_FILE_SIZE, ENV_RUNTIME_DIAL9_OUTPUT_DIR,
|
||||
ENV_RUNTIME_DIAL9_ROTATION_COUNT, ENV_RUNTIME_DIAL9_S3_BUCKET, ENV_RUNTIME_DIAL9_S3_PREFIX, ENV_RUNTIME_DIAL9_SAMPLING_RATE,
|
||||
};
|
||||
use rustfs_utils::get_env_bool;
|
||||
use rustfs_utils::get_env_f64;
|
||||
use rustfs_utils::get_env_opt_str;
|
||||
use rustfs_utils::get_env_str;
|
||||
use rustfs_utils::get_env_u64;
|
||||
use rustfs_utils::get_env_usize;
|
||||
use std::path::PathBuf;
|
||||
use tracing::{info, warn};
|
||||
|
||||
/// Configuration for dial9 Tokio telemetry.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Dial9Config {
|
||||
/// Whether dial9 telemetry is enabled
|
||||
pub enabled: bool,
|
||||
|
||||
/// Directory where trace files are written
|
||||
pub output_dir: String,
|
||||
|
||||
/// Prefix for trace file names
|
||||
pub file_prefix: String,
|
||||
|
||||
/// Maximum size of each trace file in bytes
|
||||
pub max_file_size: u64,
|
||||
|
||||
/// Number of rotated files to keep
|
||||
pub rotation_count: usize,
|
||||
|
||||
/// Optional S3 bucket for uploading trace files
|
||||
pub s3_bucket: Option<String>,
|
||||
|
||||
/// Optional S3 prefix for uploaded files
|
||||
pub s3_prefix: Option<String>,
|
||||
|
||||
/// Sampling rate (0.0 to 1.0)
|
||||
pub sampling_rate: f64,
|
||||
}
|
||||
|
||||
impl Default for Dial9Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: DEFAULT_RUNTIME_DIAL9_ENABLED,
|
||||
output_dir: DEFAULT_RUNTIME_DIAL9_OUTPUT_DIR.to_string(),
|
||||
file_prefix: DEFAULT_RUNTIME_DIAL9_FILE_PREFIX.to_string(),
|
||||
max_file_size: DEFAULT_RUNTIME_DIAL9_MAX_FILE_SIZE,
|
||||
rotation_count: DEFAULT_RUNTIME_DIAL9_ROTATION_COUNT,
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
sampling_rate: DEFAULT_RUNTIME_DIAL9_SAMPLING_RATE,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Dial9Config {
|
||||
/// Create configuration from environment variables.
|
||||
pub fn from_env() -> Self {
|
||||
let enabled = get_env_bool(ENV_RUNTIME_DIAL9_ENABLED, DEFAULT_RUNTIME_DIAL9_ENABLED);
|
||||
|
||||
if !enabled {
|
||||
return Self::default();
|
||||
}
|
||||
|
||||
Self {
|
||||
enabled,
|
||||
output_dir: get_env_str(ENV_RUNTIME_DIAL9_OUTPUT_DIR, DEFAULT_RUNTIME_DIAL9_OUTPUT_DIR),
|
||||
file_prefix: get_env_str(ENV_RUNTIME_DIAL9_FILE_PREFIX, DEFAULT_RUNTIME_DIAL9_FILE_PREFIX),
|
||||
max_file_size: get_env_u64(ENV_RUNTIME_DIAL9_MAX_FILE_SIZE, DEFAULT_RUNTIME_DIAL9_MAX_FILE_SIZE),
|
||||
rotation_count: get_env_usize(ENV_RUNTIME_DIAL9_ROTATION_COUNT, DEFAULT_RUNTIME_DIAL9_ROTATION_COUNT),
|
||||
s3_bucket: get_env_opt_str(ENV_RUNTIME_DIAL9_S3_BUCKET).filter(|s| !s.is_empty()),
|
||||
s3_prefix: get_env_opt_str(ENV_RUNTIME_DIAL9_S3_PREFIX).filter(|s| !s.is_empty()),
|
||||
sampling_rate: get_env_f64(ENV_RUNTIME_DIAL9_SAMPLING_RATE, DEFAULT_RUNTIME_DIAL9_SAMPLING_RATE).clamp(0.0, 1.0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the base path for trace files.
|
||||
pub fn base_path(&self) -> PathBuf {
|
||||
PathBuf::from(&self.output_dir).join(&self.file_prefix)
|
||||
}
|
||||
}
|
||||
|
||||
/// Guard for dial9 telemetry session.
|
||||
///
|
||||
/// When dropped, this guard will flush any remaining telemetry data.
|
||||
/// Keep it alive for the duration of your application.
|
||||
pub struct Dial9SessionGuard {
|
||||
/// The underlying dial9 telemetry guard (if enabled)
|
||||
_guard: Option<Dial9TelemetryGuard>,
|
||||
/// Configuration
|
||||
#[allow(dead_code)]
|
||||
config: Dial9Config,
|
||||
}
|
||||
|
||||
impl Dial9SessionGuard {
|
||||
/// Create a new dial9 session guard.
|
||||
///
|
||||
/// Note: This only validates configuration and creates the output directory.
|
||||
/// The actual telemetry session is created when building the Tokio runtime
|
||||
/// via `build_traced_runtime()`.
|
||||
///
|
||||
/// Returns `Ok(None)` if dial9 is disabled.
|
||||
pub async fn new(config: Dial9Config) -> Result<Option<Self>, TelemetryError> {
|
||||
if !config.enabled {
|
||||
info!("Dial9 telemetry disabled");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
info!(
|
||||
output_dir = %config.output_dir,
|
||||
file_prefix = %config.file_prefix,
|
||||
sampling_rate = config.sampling_rate,
|
||||
"Validating dial9 telemetry configuration"
|
||||
);
|
||||
|
||||
// Only create directory; writer will be created in build_traced_runtime
|
||||
if let Err(e) = tokio::fs::create_dir_all(&config.output_dir).await {
|
||||
warn!("Failed to create dial9 output directory '{}': {}", config.output_dir, e);
|
||||
warn!("Continuing without dial9 telemetry");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
info!("Dial9 telemetry configuration validated successfully");
|
||||
|
||||
Ok(Some(Self { _guard: None, config }))
|
||||
}
|
||||
|
||||
/// Set the telemetry guard (called after runtime creation)
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn set_guard(&mut self, guard: Dial9TelemetryGuard) {
|
||||
self._guard = Some(guard);
|
||||
}
|
||||
|
||||
/// Check if this guard has an active session.
|
||||
pub fn is_active(&self) -> bool {
|
||||
self._guard.is_some()
|
||||
}
|
||||
|
||||
/// Flush any pending telemetry data.
|
||||
pub async fn shutdown(&self) {
|
||||
if let Some(_guard) = &self._guard {
|
||||
info!("Dial9 telemetry data will be flushed on drop");
|
||||
// TelemetryGuard handles flushing automatically when dropped
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Dial9SessionGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Some(_guard) = &self._guard {
|
||||
// TelemetryGuard flushes automatically when dropped
|
||||
info!("Dial9 telemetry guard dropped, data flushed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize dial9 telemetry session from environment configuration.
|
||||
///
|
||||
/// This function reads configuration from environment variables and creates
|
||||
/// a dial9 session guard if enabled. The guard should be kept alive for the
|
||||
/// duration of the application.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// - `Ok(Some(guard))` - Dial9 is enabled and session initialized
|
||||
/// - `Ok(None)` - Dial9 is disabled or failed to initialize (non-fatal)
|
||||
/// - `Err(e)` - Fatal error (should not happen with current implementation)
|
||||
pub async fn init_session() -> Result<Option<Dial9SessionGuard>, TelemetryError> {
|
||||
let config = Dial9Config::from_env();
|
||||
Dial9SessionGuard::new(config).await
|
||||
}
|
||||
|
||||
/// Check if dial9 telemetry is enabled via environment configuration.
|
||||
pub fn is_enabled() -> bool {
|
||||
get_env_bool(ENV_RUNTIME_DIAL9_ENABLED, DEFAULT_RUNTIME_DIAL9_ENABLED)
|
||||
}
|
||||
|
||||
/// Build a Tokio runtime with dial9 telemetry enabled.
|
||||
///
|
||||
/// This function creates a Tokio runtime with dial9 telemetry integrated
|
||||
/// if enabled via environment variables. Returns a tuple of (Runtime, TelemetryGuard).
|
||||
///
|
||||
/// This is internal API used by the runtime builder.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `builder` - The configured Tokio runtime builder
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Ok((runtime, guard))` - Successfully created runtime with telemetry
|
||||
/// * `Err` - If runtime creation fails or dial9 is enabled but fails to initialize
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if:
|
||||
/// - Dial9 is enabled but the runtime builder fails
|
||||
/// - Dial9 is enabled but writer creation fails
|
||||
pub fn build_traced_runtime(
|
||||
builder: tokio::runtime::Builder,
|
||||
) -> Result<(tokio::runtime::Runtime, Dial9TelemetryGuard), TelemetryError> {
|
||||
if !is_enabled() {
|
||||
return Err(TelemetryError::Io("Dial9 is not enabled".to_string()));
|
||||
}
|
||||
|
||||
let config = Dial9Config::from_env();
|
||||
|
||||
// Ensure the output directory exists before creating the writer
|
||||
std::fs::create_dir_all(&config.output_dir)
|
||||
.map_err(|e| TelemetryError::Io(format!("Failed to create dial9 output directory '{}': {}", config.output_dir, e)))?;
|
||||
|
||||
// Create rotating writer (synchronous for runtime building)
|
||||
let base_path = config.base_path();
|
||||
let writer = RotatingWriter::new(base_path, config.max_file_size, config.max_file_size * config.rotation_count as u64)
|
||||
.map_err(|e| TelemetryError::Io(format!("Failed to create RotatingWriter: {}", e)))?;
|
||||
|
||||
// Build traced runtime
|
||||
// Note: sampling_rate and S3 upload settings are reserved for future use
|
||||
// once the dial9 library provides support for those configuration options.
|
||||
dial9_tokio_telemetry::telemetry::TracedRuntime::builder()
|
||||
.with_task_tracking(true)
|
||||
.build(builder, writer)
|
||||
.map_err(|e| TelemetryError::Io(format!("Failed to build TracedRuntime: {}", e)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_dial9_config_default() {
|
||||
let config = Dial9Config::default();
|
||||
assert!(!config.enabled);
|
||||
assert_eq!(config.output_dir, DEFAULT_RUNTIME_DIAL9_OUTPUT_DIR);
|
||||
assert_eq!(config.file_prefix, DEFAULT_RUNTIME_DIAL9_FILE_PREFIX);
|
||||
assert_eq!(config.max_file_size, DEFAULT_RUNTIME_DIAL9_MAX_FILE_SIZE);
|
||||
assert_eq!(config.rotation_count, DEFAULT_RUNTIME_DIAL9_ROTATION_COUNT);
|
||||
assert_eq!(config.sampling_rate, DEFAULT_RUNTIME_DIAL9_SAMPLING_RATE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dial9_config_base_path() {
|
||||
let config = Dial9Config {
|
||||
output_dir: "/tmp/telemetry".to_string(),
|
||||
file_prefix: "rustfs".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.base_path(), PathBuf::from("/tmp/telemetry/rustfs"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_enabled_default() {
|
||||
// Skip if environment variable is explicitly set
|
||||
if std::env::var(ENV_RUNTIME_DIAL9_ENABLED).is_ok() {
|
||||
println!("Skipping test: RUSTFS_RUNTIME_DIAL9_ENABLED is set");
|
||||
return;
|
||||
}
|
||||
assert!(!is_enabled());
|
||||
}
|
||||
}
|
||||
@@ -39,6 +39,8 @@
|
||||
//! initialised together with an optional stdout mirror.
|
||||
//! 3. **Stdout only** — default fallback; no file I/O, no remote export.
|
||||
|
||||
// Dial9 module - public types are re-exported at crate level
|
||||
pub mod dial9;
|
||||
mod filter;
|
||||
mod guard;
|
||||
mod local;
|
||||
|
||||
76
examples/test_dial9.rs
Normal file
76
examples/test_dial9.rs
Normal file
@@ -0,0 +1,76 @@
|
||||
// Test dial9 integration
|
||||
use rustfs_obs::dial9::{init_session, is_enabled, Dial9Config};
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("=== Dial9 Integration Test ===\n");
|
||||
|
||||
// Test 1: Check initial dial9 state
|
||||
println!("Test 1: Default state");
|
||||
let initial_enabled = is_enabled();
|
||||
println!(" dial9 enabled: {}", initial_enabled);
|
||||
if initial_enabled {
|
||||
println!(" ⚠ SKIP: Dial9 is already enabled via environment; skipping default-disabled assertion\n");
|
||||
} else {
|
||||
println!(" ✓ PASS: Dial9 is disabled by default\n");
|
||||
}
|
||||
|
||||
// Test 2: Enable dial9 via environment variable
|
||||
println!("Test 2: Enable dial9 via environment");
|
||||
std::env::set_var("RUSTFS_RUNTIME_DIAL9_ENABLED", "true");
|
||||
std::env::set_var("RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR", "/tmp/rustfs-test-telemetry");
|
||||
std::env::set_var("RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE", "0.5");
|
||||
|
||||
let config = Dial9Config::from_env();
|
||||
println!(" config.enabled: {}", config.enabled);
|
||||
println!(" config.output_dir: {}", config.output_dir);
|
||||
println!(" config.file_prefix: {}", config.file_prefix);
|
||||
println!(" config.sampling_rate: {}", config.sampling_rate);
|
||||
|
||||
assert!(config.enabled);
|
||||
assert_eq!(config.output_dir, "/tmp/rustfs-test-telemetry");
|
||||
assert_eq!(config.sampling_rate, 0.5);
|
||||
println!(" ✓ PASS: Configuration loaded correctly\n");
|
||||
|
||||
// Test 3: Initialize dial9 session
|
||||
println!("Test 3: Initialize dial9 session");
|
||||
match init_session().await {
|
||||
Ok(Some(guard)) => {
|
||||
println!(" Dial9 session initialized successfully");
|
||||
println!(" guard.is_active(): {}", guard.is_active());
|
||||
println!(" ✓ PASS: Session initialized\n");
|
||||
|
||||
// Test 4: Generate some async activity
|
||||
println!("Test 4: Generate async activity for tracing");
|
||||
let handle = tokio::spawn(async {
|
||||
for i in 1..=5 {
|
||||
println!(" Task iteration {}", i);
|
||||
sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
});
|
||||
handle.await?;
|
||||
println!(" ✓ PASS: Async activity completed\n");
|
||||
|
||||
// Test 5: Session shutdown
|
||||
println!("Test 5: Session cleanup");
|
||||
drop(guard);
|
||||
println!(" ✓ PASS: Session cleaned up\n");
|
||||
}
|
||||
Ok(None) => {
|
||||
println!(" ⚠ SKIP: Dial9 session not created (writer init may have failed)\n");
|
||||
}
|
||||
Err(e) => {
|
||||
println!(" ✗ FAIL: {:?}", e);
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
std::env::remove_var("RUSTFS_RUNTIME_DIAL9_ENABLED");
|
||||
std::env::remove_var("RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR");
|
||||
std::env::remove_var("RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE");
|
||||
|
||||
println!("=== All Tests Passed! ===");
|
||||
Ok(())
|
||||
}
|
||||
@@ -85,6 +85,7 @@
|
||||
|
||||
# Set environment variables for build
|
||||
PROTOC = "${pkgs.protobuf}/bin/protoc";
|
||||
RUSTFLAGS = "--cfg tokio_unstable";
|
||||
|
||||
doCheck = false;
|
||||
|
||||
@@ -122,6 +123,7 @@
|
||||
];
|
||||
|
||||
PROTOC = "${pkgs.protobuf}/bin/protoc";
|
||||
RUSTFLAGS = "--cfg tokio_unstable";
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
@@ -107,9 +107,8 @@ fn main() {
|
||||
eprintln!("[WARN] Failed to bootstrap external-prefix compatibility: {err}");
|
||||
}
|
||||
|
||||
let runtime = server::tokio_runtime_builder()
|
||||
.build()
|
||||
.expect("Failed to build Tokio runtime");
|
||||
// Build Tokio runtime with optional dial9 telemetry support
|
||||
let runtime = server::build_tokio_runtime().expect("Failed to build Tokio runtime");
|
||||
let result = runtime.block_on(async_main());
|
||||
if let Err(ref e) = result {
|
||||
// Use eprintln as tracing may not be initialized at this point
|
||||
@@ -202,6 +201,15 @@ async fn async_main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
// Check dial9 Tokio runtime telemetry status
|
||||
// Note: The actual telemetry session is created in build_tokio_runtime()
|
||||
// which stores the TelemetryGuard globally for the program duration.
|
||||
if rustfs_obs::dial9::is_enabled() {
|
||||
info!(target: "rustfs::main", "Dial9 Tokio telemetry is configured as enabled; runtime guard was installed during startup.");
|
||||
} else {
|
||||
info!(target: "rustfs::main", "Dial9 Tokio telemetry is not configured (set RUSTFS_RUNTIME_DIAL9_ENABLED=true to enable).");
|
||||
}
|
||||
|
||||
info!("license status: {}", license_status());
|
||||
if let Some(token) = current_license() {
|
||||
info!("runtime license loaded: {}", token.name);
|
||||
|
||||
@@ -31,7 +31,7 @@ pub(crate) use event::{init_event_notifier, shutdown_event_notifier};
|
||||
pub(crate) use http::start_http_server;
|
||||
pub(crate) use prefix::*;
|
||||
pub(crate) use readiness::ReadinessGateLayer;
|
||||
pub(crate) use runtime::tokio_runtime_builder;
|
||||
pub(crate) use runtime::build_tokio_runtime;
|
||||
pub(crate) use service_state::SHUTDOWN_TIMEOUT;
|
||||
pub(crate) use service_state::ServiceState;
|
||||
pub(crate) use service_state::ServiceStateManager;
|
||||
|
||||
@@ -12,9 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Duration;
|
||||
use sysinfo::{RefreshKind, System};
|
||||
|
||||
// Import TelemetryGuard from rustfs_obs re-export
|
||||
use rustfs_obs::dial9::TelemetryGuard;
|
||||
|
||||
// Global storage for TelemetryGuard to keep it alive for the program duration
|
||||
static DIAL9_TELEMETRY_GUARD: OnceLock<TelemetryGuard> = OnceLock::new();
|
||||
|
||||
#[inline]
|
||||
fn compute_default_thread_stack_size() -> usize {
|
||||
// Baseline: Release 1 MiB,Debug 2 MiB;macOS at least 2 MiB
|
||||
@@ -80,9 +87,9 @@ fn compute_default_max_blocking_threads() -> usize {
|
||||
/// Panics if environment variable values are invalid
|
||||
/// # Examples
|
||||
/// ```no_run
|
||||
/// use rustfs_server::tokio_runtime_builder;
|
||||
/// let builder = tokio_runtime_builder();
|
||||
/// let runtime = builder.build().unwrap();
|
||||
/// // tokio_runtime_builder is pub(crate) - call it from within the rustfs binary:
|
||||
/// // let builder = tokio_runtime_builder();
|
||||
/// // let runtime = builder.build().unwrap();
|
||||
/// ```
|
||||
pub(crate) fn tokio_runtime_builder() -> tokio::runtime::Builder {
|
||||
let mut builder = tokio::runtime::Builder::new_multi_thread();
|
||||
@@ -158,3 +165,78 @@ pub(crate) fn tokio_runtime_builder() -> tokio::runtime::Builder {
|
||||
fn print_tokio_thread_enable() -> bool {
|
||||
rustfs_utils::get_env_bool(rustfs_config::ENV_THREAD_PRINT_ENABLED, rustfs_config::DEFAULT_THREAD_PRINT_ENABLED)
|
||||
}
|
||||
|
||||
/// Build Tokio runtime with optional dial9 telemetry support.
|
||||
///
|
||||
/// If dial9 is enabled via environment variables, creates a TracedRuntime
|
||||
/// and stores the TelemetryGuard globally to keep it alive for the
|
||||
/// duration of the program.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Ok(runtime)` - Successfully created runtime
|
||||
/// * `Err(e)` - Failed to create runtime
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if:
|
||||
/// - The Tokio runtime builder fails
|
||||
/// - Dial9 is enabled but fails to initialize (falls back to standard runtime)
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// // build_tokio_runtime is pub(crate) - call it from within the rustfs binary:
|
||||
/// // let runtime = build_tokio_runtime().expect("Failed to build runtime");
|
||||
/// // runtime.block_on(async { /* ... */ })
|
||||
/// ```
|
||||
pub(crate) fn build_tokio_runtime() -> Result<tokio::runtime::Runtime, BuildError> {
|
||||
let mut builder = tokio_runtime_builder();
|
||||
|
||||
// Check if dial9 is enabled
|
||||
if rustfs_obs::dial9::is_enabled() {
|
||||
tracing::info!("Dial9 telemetry enabled, building TracedRuntime");
|
||||
|
||||
return match rustfs_obs::dial9::build_traced_runtime(builder) {
|
||||
Ok((runtime, guard)) => {
|
||||
// Store guard in global static to keep it alive for the program duration
|
||||
let _ = DIAL9_TELEMETRY_GUARD.set(guard);
|
||||
tracing::info!("TracedRuntime created successfully, guard stored globally");
|
||||
Ok(runtime)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to build TracedRuntime: {}", e);
|
||||
tracing::warn!("Falling back to standard Tokio runtime");
|
||||
// Rebuild the builder for standard runtime
|
||||
let mut builder = tokio_runtime_builder();
|
||||
builder.build().map_err(BuildError::Runtime)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Standard runtime
|
||||
builder.build().map_err(BuildError::Runtime)
|
||||
}
|
||||
|
||||
/// Error type for runtime building failures.
|
||||
#[derive(Debug)]
|
||||
pub enum BuildError {
|
||||
/// Tokio runtime creation failed
|
||||
Runtime(std::io::Error),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BuildError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
BuildError::Runtime(e) => write!(f, "Failed to build Tokio runtime: {}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for BuildError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
match self {
|
||||
BuildError::Runtime(e) => Some(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +89,61 @@ export RUSTFS_RUNTIME_THREAD_STACK_SIZE=1024*1024
|
||||
export RUSTFS_RUNTIME_THREAD_KEEP_ALIVE=60
|
||||
export RUSTFS_RUNTIME_GLOBAL_QUEUE_INTERVAL=31
|
||||
|
||||
# ============================================================================
|
||||
# dial9 Tokio Runtime Telemetry Configuration
|
||||
# ============================================================================
|
||||
# dial9 provides low-overhead Tokio runtime-level telemetry for performance diagnostics.
|
||||
# It captures events like PollStart/End, WorkerPark/Unpark, QueueSample, TaskSpawn.
|
||||
#
|
||||
# Features:
|
||||
# - CPU overhead < 5% (with sampling rate 1.0)
|
||||
# - Automatic file rotation (configurable size and count)
|
||||
# - Graceful degradation if initialization fails
|
||||
#
|
||||
# Note: Disabled by default. Enable only when needed for runtime diagnostics.
|
||||
# Note: Requires build flag --cfg tokio_unstable (set in .cargo/config.toml).
|
||||
|
||||
# Enable dial9 telemetry (default: false)
|
||||
#export RUSTFS_RUNTIME_DIAL9_ENABLED=true
|
||||
|
||||
# Output directory for trace files (default: /var/log/rustfs/telemetry)
|
||||
#export RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR="$current_dir/deploy/telemetry"
|
||||
|
||||
# Trace file prefix (default: rustfs-tokio)
|
||||
#export RUSTFS_RUNTIME_DIAL9_FILE_PREFIX=rustfs-tokio
|
||||
|
||||
# Maximum trace file size in bytes (default: 104857600 = 100MB)
|
||||
#export RUSTFS_RUNTIME_DIAL9_MAX_FILE_SIZE=104857600
|
||||
|
||||
# Number of rotated files to keep (default: 10)
|
||||
#export RUSTFS_RUNTIME_DIAL9_ROTATION_COUNT=10
|
||||
|
||||
# Sampling rate: 0.0 to 1.0 (default: 1.0 = 100% sampling)
|
||||
# Lower values reduce CPU overhead. Recommended: 0.1-0.5 for production.
|
||||
#export RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE=1.0
|
||||
|
||||
# S3 upload settings (not yet implemented; reserved for future use):
|
||||
#export RUSTFS_RUNTIME_DIAL9_S3_BUCKET=my-trace-bucket
|
||||
#export RUSTFS_RUNTIME_DIAL9_S3_PREFIX=telemetry/
|
||||
|
||||
# --- Scenario 1: Development / Debugging ---
|
||||
# Full tracing with local storage, high sampling rate
|
||||
#export RUSTFS_RUNTIME_DIAL9_ENABLED=true
|
||||
#export RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR="$current_dir/deploy/telemetry"
|
||||
#export RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE=1.0
|
||||
|
||||
# --- Scenario 2: Production Diagnostics ---
|
||||
# Reduced sampling rate to minimize overhead
|
||||
#export RUSTFS_RUNTIME_DIAL9_ENABLED=true
|
||||
#export RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE=0.1
|
||||
|
||||
# --- Scenario 3: Performance Investigation ---
|
||||
# Short-term tracing with high detail, manual cleanup
|
||||
#export RUSTFS_RUNTIME_DIAL9_ENABLED=true
|
||||
#export RUSTFS_RUNTIME_DIAL9_OUTPUT_DIR=/tmp/rustfs-telemetry-investigation
|
||||
#export RUSTFS_RUNTIME_DIAL9_SAMPLING_RATE=1.0
|
||||
#export RUSTFS_RUNTIME_DIAL9_ROTATION_COUNT=3
|
||||
|
||||
export OTEL_INSTRUMENTATION_NAME="rustfs"
|
||||
export OTEL_INSTRUMENTATION_VERSION="0.1.1"
|
||||
export OTEL_INSTRUMENTATION_SCHEMA_URL="https://opentelemetry.io/schemas/1.31.0"
|
||||
|
||||
Reference in New Issue
Block a user