Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 99 additions & 1 deletion metrics-exporter/src/bin/io_engine/client/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ pub(crate) struct PoolInfo {
committed: u64,
disk_capacity: u64,
max_expandable_size: u64,
io_error_count: u64,
io_error_threshold: u64,
io_stalled: bool,
io_stall_transition_count: u64,
io_stall_transition_threshold: u64,
alert_status: i32,
notice: Vec<i32>,
attention: Vec<i32>,
warning: Vec<i32>,
critical: Vec<i32>,
}

impl PoolInfo {
Expand Down Expand Up @@ -43,10 +53,60 @@ impl PoolInfo {
self.max_expandable_size
}

/// Get pool of the io_engine.
/// Get state of the Pool.
pub(crate) fn state(&self) -> u64 {
self.state
}

/// Get the count of IO errors on the pool.
pub(crate) fn io_error_count(&self) -> u64 {
self.io_error_count
}

/// Get the IO error threshold for the pool.
pub(crate) fn io_error_threshold(&self) -> u64 {
self.io_error_threshold
}

/// Get whether the pool is currently in stalled state.
pub(crate) fn io_stalled(&self) -> bool {
self.io_stalled
}

/// Get the count of IO stall transitions on the pool.
pub(crate) fn io_stall_transition_count(&self) -> u64 {
self.io_stall_transition_count
}

/// Get the IO stall transition threshold for the pool.
pub(crate) fn io_stall_transition_threshold(&self) -> u64 {
self.io_stall_transition_threshold
}

/// Get the alert status for the pool.
pub(crate) fn alert_status(&self) -> i32 {
self.alert_status
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the enum value?
In the node exporter, online is being created as a gauge, would it be better to do the same here, and create gauges for each status? (I don't know how these are typically exported...)

}

/// Get the collection of notice alert reasons for the pool.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this mean, what is this number, the enum value, the count.. ?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought of concatenating all i32 into one f64 value. Pasted example below. attention reason has
IoError and IoStrallIntermittent both.

./io-engine-client pool list -ojson
[
{
"capacity": 1069547520,
"cluster_size": 4194304,
"committed": 0,
"disk_capacity": 1073741824,
"disk_info": [
{
"errors": {
"alerts": {
"attention": [
4,
2
],
"critical": [
1
],
"notice": [],
"status": 3,
"warning": []
},
"io_error_count": 3,
"io_error_threshold": 8,
"io_stall_transition_count": 2,
"io_stall_transition_threshold": 3,
"io_stalled": true
},
"uri": "uring:///dev/mapper/lvmvg-newlv"
}
],
"disks": [
"uring:///dev/mapper/lvmvg-newlv"
],
"encrypted": false,
"errors": {
"alerts": {
"attention": [
4,
2
],
"critical": [
1
],
"notice": [],
"status": 3,
"warning": []
},
"io_error_count": 3,
"io_error_threshold": 8,
"io_stall_transition_count": 2,
"io_stall_transition_threshold": 3,
"io_stalled": true
},
"max_expandable_size": 137271181312,
"md_info": {
"md_page_size": 4096,
"md_pages": 256,
"md_used_pages": 1
},
"name": "pool-uring",
"page_size": 4096,
"pooltype": 0,
"state": 4,
"used": 0,
"uuid": "03de92fb-1f6b-4121-ba3b-ee31934b9abc"
}
]
/bin # %

image

After discussing with @krishnaGajabi , Im thinking of exposing a variant present or not using labels. Will see how that works.

pub(crate) fn notice(&self) -> &Vec<i32> {
&self.notice
}

/// Get the collection of attention alert reasons for the pool.
pub(crate) fn attention(&self) -> &Vec<i32> {
&self.attention
}

/// Get the collection of warning alert reasons for the pool.
pub(crate) fn warning(&self) -> &Vec<i32> {
&self.warning
}

/// Get the collection of critical alert reasons for the pool.
pub(crate) fn critical(&self) -> &Vec<i32> {
&self.critical
}
}

/// Array of PoolInfo objects.
Expand All @@ -57,6 +117,34 @@ pub(crate) struct Pools {

impl From<rpc::v1::pool::Pool> for PoolInfo {
fn from(value: rpc::v1::pool::Pool) -> Self {
let mut io_error_count: u64 = 0;
let mut io_error_threshold: u64 = 0;
let mut io_stalled: bool = false;
let mut io_stall_transition_count: u64 = 0;
let mut io_stall_transition_threshold: u64 = 0;
let mut alert_status: i32 = 0;
let mut notice: Vec<i32> = Vec::new();
let mut attention: Vec<i32> = Vec::new();
let mut warning: Vec<i32> = Vec::new();
let mut critical: Vec<i32> = Vec::new();
if let Some(errors) = value.errors.clone() {
io_error_count = errors.io_error_count;
io_error_threshold = errors.io_error_threshold;
io_stalled = errors.io_stalled;
io_stall_transition_count = errors.io_stall_transition_count;
io_stall_transition_threshold = errors.io_stall_transition_threshold;
if let Some(alerts) = errors.alerts {
alert_status = alerts.status;

notice = alerts.notice;

attention = alerts.attention;

warning = alerts.warning;

critical = alerts.critical;
}
}
Self {
name: value.name,
used: value.used,
Expand All @@ -65,6 +153,16 @@ impl From<rpc::v1::pool::Pool> for PoolInfo {
committed: value.committed,
disk_capacity: value.disk_capacity,
max_expandable_size: value.max_expandable_size.unwrap_or_default(),
io_error_count,
io_error_threshold,
io_stalled,
io_stall_transition_count,
io_stall_transition_threshold,
alert_status,
notice,
attention,
warning,
critical,
}
}
}
37 changes: 37 additions & 0 deletions metrics-exporter/src/bin/io_engine/collector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,43 @@ fn init_diskpool_gauge_vec(
gauge_vec
}

/// Initializes a GaugeVec metric for diskpool alert reason with the provided metric name, description and
/// descriptors.
fn init_diskpool_alert_reason_gauge_vec(
metric_name: &str,
metric_desc: &str,
descs: &mut Vec<Desc>,
) -> GaugeVec {
let opts = Opts::new(metric_name, metric_desc)
.subsystem("diskpool_alert")
.variable_labels(vec![
"node".to_string(),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's place these in a vec and reuse it here and below to avoid discrepancies

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

node is part of Collector struct now. Is that what you meant?

"name".to_string(),
"unknown".to_string(),
"io_stalled".to_string(),
"io_stall_intermittent".to_string(),
"io_stall_intermittent_exc".to_string(),
"io_error".to_string(),
"io_error_exc".to_string(),
]);
let gauge_vec = GaugeVec::new(
opts,
&[
"node",
"name",
"unknown",
"io_stalled",
"io_stall_intermittent",
"io_stall_intermittent_exc",
"io_error",
"io_error_exc",
],
)
.unwrap_or_else(|_| panic!("Unable to create gauge metric type for {metric_name}"));
descs.extend(gauge_vec.desc().into_iter().cloned());
gauge_vec
}

/// Initializes a GaugeVec metric for volume with the provided metric name, description and
/// descriptors.
fn init_volume_gauge_vec(metric_name: &str, metric_desc: &str, descs: &mut Vec<Desc>) -> GaugeVec {
Expand Down
Loading
Loading