Skip to content

Commit f7995ce

Browse files
committed
feat: add disk stats collector
This allows collection of I/O stats (bytes read/written) for all disks
1 parent 5041290 commit f7995ce

File tree

6 files changed

+205
-2
lines changed

6 files changed

+205
-2
lines changed

examples/config.kdl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,7 @@ metrics {
1212
mountpoints "/"
1313
}
1414
pressure enabled=#true
15+
disk_stats enabled=#true {
16+
mountpoints "/"
17+
}
1518
}

src/collector.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ use smol::lock::RwLock;
1212

1313
use crate::config::UserConfig;
1414
use crate::metrics::collector::{
15-
CpuStatsCollector, FilesystemStatsCollector, MemoryStatsCollector, NetworkStatsCollector,
16-
NodeInfoCollector, PressureCollector, SystemdUnitStateCollector,
15+
CpuStatsCollector, DiskStatsCollector, FilesystemStatsCollector, MemoryStatsCollector, NetworkStatsCollector, NodeInfoCollector, PressureCollector, SystemdUnitStateCollector
1716
};
1817
use crate::metrics::Metric;
1918

@@ -99,6 +98,13 @@ impl Collector {
9998
inner.metrics.push(collector);
10099
}
101100

101+
if metrics.disk_stats.enabled {
102+
let mountpoints = metrics.disk_stats.mountpoints.join(",");
103+
let options = HashMap::from([("mountpoints".to_owned(), mountpoints)]);
104+
let collector = Box::new(DiskStatsCollector::new(options)?);
105+
inner.metrics.push(collector);
106+
}
107+
102108
Ok(())
103109
}
104110

src/config.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub struct MetricsConfig {
2626
pub network_throughput: NetworkThroughputConfig,
2727
pub disk_usage: DiskUsageConfig,
2828
pub pressure: PressureConfig,
29+
pub disk_stats: DiskStatConfig,
2930
}
3031

3132
#[derive(Debug)]
@@ -67,6 +68,12 @@ pub struct PressureConfig {
6768
pub enabled: bool,
6869
}
6970

71+
#[derive(Debug)]
72+
pub struct DiskStatConfig {
73+
pub enabled: bool,
74+
pub mountpoints: Vec<String>,
75+
}
76+
7077
impl Default for MetricsConfig {
7178
fn default() -> Self {
7279
Self {
@@ -89,6 +96,10 @@ impl Default for MetricsConfig {
8996
mountpoints: vec![],
9097
},
9198
pressure: PressureConfig { enabled: true },
99+
disk_stats: DiskStatConfig {
100+
enabled: false,
101+
mountpoints: vec![],
102+
},
92103
}
93104
}
94105
}
@@ -205,6 +216,28 @@ impl UserConfig {
205216
.unwrap_or_default();
206217
ret.pressure = PressureConfig { enabled };
207218
}
219+
220+
if let Some(node) = children.get("disk_stats") {
221+
let enabled = node
222+
.get("enabled")
223+
.and_then(|el| el.as_bool())
224+
.unwrap_or_default();
225+
let mountpoints = node
226+
.children()
227+
.and_then(|el| el.get("mountpoints"))
228+
.map(|el| el.entries())
229+
.map(|it| {
230+
it.iter()
231+
.filter_map(|el| el.value().as_string())
232+
.map(ToOwned::to_owned)
233+
.collect::<Vec<_>>()
234+
})
235+
.unwrap_or_default();
236+
ret.disk_stats = DiskStatConfig {
237+
enabled,
238+
mountpoints,
239+
};
240+
}
208241
}
209242

210243
ret

src/metrics/collector.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use prometheus_client::metrics::gauge::Gauge;
1111
use smol::lock::Mutex;
1212

1313
use super::cpu::{CpuUsage, LoadAverages};
14+
use super::disk::IOMetrics;
1415
use super::fs::FilesystemUsage;
1516
use super::info::NodeInfo;
1617
use super::memory::MemoryStats;
@@ -493,3 +494,80 @@ impl Metric for PressureCollector {
493494
})
494495
}
495496
}
497+
498+
#[derive(Debug)]
499+
pub struct DiskStatsCollector {
500+
bytes_written: Family<DiskStatsLabels, Gauge<u64, AtomicU64>>,
501+
bytes_read: Family<DiskStatsLabels, Gauge<u64, AtomicU64>>,
502+
mountpoints: Vec<String>,
503+
}
504+
505+
#[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
506+
struct DiskStatsLabels {
507+
/// Device name
508+
device: String,
509+
/// Mountpoint
510+
mountpoint: String,
511+
}
512+
513+
impl DiskStatsCollector {
514+
pub fn new(options: hashbrown::HashMap<String, String>) -> Result<Self> {
515+
let mountpoints = if let Some(mountpoints_str) = options.get("mountpoints") {
516+
mountpoints_str
517+
.split(',')
518+
.map(|s| s.trim().to_string())
519+
.collect()
520+
} else {
521+
vec!["/".to_string()] // Default to root filesystem
522+
};
523+
524+
Ok(Self {
525+
bytes_written: Default::default(),
526+
bytes_read: Default::default(),
527+
mountpoints,
528+
})
529+
}
530+
}
531+
532+
impl Metric for DiskStatsCollector {
533+
fn register(&self, registry: &mut prometheus_client::registry::Registry) {
534+
registry.register(
535+
"litemon_disk_bytes_written_total",
536+
"Number of bytes written to disk since boot",
537+
self.bytes_written.clone(),
538+
);
539+
registry.register(
540+
"litemon_disk_bytes_read_total",
541+
"Number of bytes read from disk since boot",
542+
self.bytes_read.clone(),
543+
);
544+
}
545+
546+
fn collect(&self) -> DynFuture<'_, Result<()>> {
547+
Box::pin(async move {
548+
let stats = IOMetrics::all().await?;
549+
for (device, stats) in &stats.disks {
550+
if !self.mountpoints.iter().any(|el| el == &stats.mountpoint) {
551+
continue;
552+
}
553+
554+
let labels = DiskStatsLabels {
555+
device: device.clone(),
556+
mountpoint: stats.mountpoint.clone(),
557+
};
558+
559+
let bytes_read_prev = self.bytes_read.get_or_create(&labels).get();
560+
self.bytes_read
561+
.get_or_create(&labels)
562+
.inc_by(stats.bytes_read_total - bytes_read_prev);
563+
564+
let bytes_written_prev = self.bytes_written.get_or_create(&labels).get();
565+
self.bytes_written
566+
.get_or_create(&labels)
567+
.inc_by(stats.bytes_written_total - bytes_written_prev);
568+
}
569+
570+
Ok(())
571+
})
572+
}
573+
}

src/metrics/disk.rs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
//! Metrics about bytes read/written to disks.
2+
3+
use anyhow::{Context, Result};
4+
use hashbrown::HashMap;
5+
6+
/// Metrics about the bytes read/written to each attached disk.
7+
#[derive(Debug)]
8+
pub struct IOMetrics {
9+
/// Each disk is one entry.
10+
pub disks: HashMap<String, DiskMetrics>,
11+
}
12+
13+
/// Metrics about a single disk.
14+
#[derive(Debug)]
15+
pub struct DiskMetrics {
16+
/// Bytes read since boot.
17+
pub bytes_read_total: u64,
18+
/// Bytes written since boot.
19+
pub bytes_written_total: u64,
20+
/// Mountpoint
21+
pub mountpoint: String,
22+
}
23+
24+
impl IOMetrics {
25+
/// Retrieve disk metrics for all attached disks.
26+
pub async fn all() -> Result<Self> {
27+
let stats =
28+
smol::unblock(|| procfs::diskstats().context("reading /proc/diskstats")).await?;
29+
// We try to only collect statistics about disks which are mounted.
30+
let mounts = smol::unblock(|| procfs::mounts().context("reading /proc/mounts")).await?;
31+
32+
let mut ret = IOMetrics {
33+
disks: HashMap::new(),
34+
};
35+
36+
for mount in &mounts {
37+
let device = &mount.fs_spec;
38+
let mount_point = &mount.fs_file;
39+
40+
// Skip non-physical mounts.
41+
let Some(device_name) = device.strip_prefix("/dev/") else {
42+
continue;
43+
};
44+
45+
// Skip pseudo filesystems
46+
if mount_point.starts_with("/proc")
47+
|| mount_point.starts_with("/sys")
48+
|| mount_point.starts_with("/dev")
49+
|| mount_point.starts_with("/run")
50+
{
51+
continue;
52+
}
53+
54+
let Some(stat) = stats.iter().find(|el| el.name == device_name) else {
55+
continue;
56+
};
57+
58+
let Some(root_device) = stats.iter().find(|el| device_name.starts_with(&el.name))
59+
else {
60+
continue;
61+
};
62+
63+
let sector_size_str =
64+
smol::fs::read_to_string(format!("/sys/block/{}/queue/hw_sector_size", root_device.name))
65+
.await
66+
.context("reading sector size")?;
67+
let sector_size = sector_size_str
68+
.trim()
69+
.parse::<u64>()
70+
.context("parsing sector size")?;
71+
let metrics = DiskMetrics {
72+
bytes_read_total: stat.sectors_read * sector_size,
73+
bytes_written_total: stat.sectors_written * sector_size,
74+
mountpoint: mount_point.clone(),
75+
};
76+
77+
ret.disks.insert(stat.name.clone(), metrics);
78+
}
79+
80+
Ok(ret)
81+
}
82+
}

src/metrics/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use anyhow::Result;
44

55
pub mod collector;
66
pub mod cpu;
7+
pub mod disk;
78
pub mod fs;
89
pub mod info;
910
pub mod memory;

0 commit comments

Comments
 (0)