-
Notifications
You must be signed in to change notification settings - Fork 44
adding pool alert metrics #833
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,16 @@ pub(crate) struct PoolInfo { | |
| committed: u64, | ||
| disk_capacity: u64, | ||
| max_expandable_size: u64, | ||
| io_error_count: u64, | ||
| io_error_threshold: u64, | ||
| io_stalled: bool, | ||
| io_stall_transition_count: u64, | ||
| io_stall_transition_threshold: u64, | ||
| alert_status: i32, | ||
| notice: Vec<i32>, | ||
| attention: Vec<i32>, | ||
| warning: Vec<i32>, | ||
| critical: Vec<i32>, | ||
| } | ||
|
|
||
| impl PoolInfo { | ||
|
|
@@ -43,10 +53,60 @@ impl PoolInfo { | |
| self.max_expandable_size | ||
| } | ||
|
|
||
| /// Get pool of the io_engine. | ||
| /// Get state of the Pool. | ||
| pub(crate) fn state(&self) -> u64 { | ||
| self.state | ||
| } | ||
|
|
||
| /// Get the count of IO errors on the pool. | ||
| pub(crate) fn io_error_count(&self) -> u64 { | ||
| self.io_error_count | ||
| } | ||
|
|
||
| /// Get the IO error threshold for the pool. | ||
| pub(crate) fn io_error_threshold(&self) -> u64 { | ||
| self.io_error_threshold | ||
| } | ||
|
|
||
| /// Get whether the pool is currently in stalled state. | ||
| pub(crate) fn io_stalled(&self) -> bool { | ||
| self.io_stalled | ||
| } | ||
|
|
||
| /// Get the count of IO stall transitions on the pool. | ||
| pub(crate) fn io_stall_transition_count(&self) -> u64 { | ||
| self.io_stall_transition_count | ||
| } | ||
|
|
||
| /// Get the IO stall transition threshold for the pool. | ||
| pub(crate) fn io_stall_transition_threshold(&self) -> u64 { | ||
| self.io_stall_transition_threshold | ||
| } | ||
|
|
||
| /// Get the alert status for the pool. | ||
| pub(crate) fn alert_status(&self) -> i32 { | ||
| self.alert_status | ||
| } | ||
|
|
||
| /// Get the collection of notice alert reasons for the pool. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this mean, what is this number, the enum value, the count.. ?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought of concatenating all i32 into one f64 value. Pasted example below. attention reason has ./io-engine-client pool list -ojson
After discussing with @krishnaGajabi , Im thinking of exposing a variant present or not using labels. Will see how that works. |
||
| pub(crate) fn notice(&self) -> &Vec<i32> { | ||
| &self.notice | ||
| } | ||
|
|
||
| /// Get the collection of attention alert reasons for the pool. | ||
| pub(crate) fn attention(&self) -> &Vec<i32> { | ||
| &self.attention | ||
| } | ||
|
|
||
| /// Get the collection of warning alert reasons for the pool. | ||
| pub(crate) fn warning(&self) -> &Vec<i32> { | ||
| &self.warning | ||
| } | ||
|
|
||
| /// Get the collection of critical alert reasons for the pool. | ||
| pub(crate) fn critical(&self) -> &Vec<i32> { | ||
| &self.critical | ||
| } | ||
| } | ||
|
|
||
| /// Array of PoolInfo objects. | ||
|
|
@@ -57,6 +117,34 @@ pub(crate) struct Pools { | |
|
|
||
| impl From<rpc::v1::pool::Pool> for PoolInfo { | ||
| fn from(value: rpc::v1::pool::Pool) -> Self { | ||
| let mut io_error_count: u64 = 0; | ||
| let mut io_error_threshold: u64 = 0; | ||
| let mut io_stalled: bool = false; | ||
| let mut io_stall_transition_count: u64 = 0; | ||
| let mut io_stall_transition_threshold: u64 = 0; | ||
| let mut alert_status: i32 = 0; | ||
| let mut notice: Vec<i32> = Vec::new(); | ||
| let mut attention: Vec<i32> = Vec::new(); | ||
| let mut warning: Vec<i32> = Vec::new(); | ||
| let mut critical: Vec<i32> = Vec::new(); | ||
| if let Some(errors) = value.errors.clone() { | ||
| io_error_count = errors.io_error_count; | ||
| io_error_threshold = errors.io_error_threshold; | ||
| io_stalled = errors.io_stalled; | ||
| io_stall_transition_count = errors.io_stall_transition_count; | ||
| io_stall_transition_threshold = errors.io_stall_transition_threshold; | ||
| if let Some(alerts) = errors.alerts { | ||
| alert_status = alerts.status; | ||
|
|
||
| notice = alerts.notice; | ||
|
|
||
| attention = alerts.attention; | ||
|
|
||
| warning = alerts.warning; | ||
|
|
||
| critical = alerts.critical; | ||
| } | ||
| } | ||
| Self { | ||
| name: value.name, | ||
| used: value.used, | ||
|
|
@@ -65,6 +153,16 @@ impl From<rpc::v1::pool::Pool> for PoolInfo { | |
| committed: value.committed, | ||
| disk_capacity: value.disk_capacity, | ||
| max_expandable_size: value.max_expandable_size.unwrap_or_default(), | ||
| io_error_count, | ||
| io_error_threshold, | ||
| io_stalled, | ||
| io_stall_transition_count, | ||
| io_stall_transition_threshold, | ||
| alert_status, | ||
| notice, | ||
| attention, | ||
| warning, | ||
| critical, | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,43 @@ fn init_diskpool_gauge_vec( | |
| gauge_vec | ||
| } | ||
|
|
||
| /// Initializes a GaugeVec metric for diskpool alert reason with the provided metric name, description and | ||
| /// descriptors. | ||
| fn init_diskpool_alert_reason_gauge_vec( | ||
| metric_name: &str, | ||
| metric_desc: &str, | ||
| descs: &mut Vec<Desc>, | ||
| ) -> GaugeVec { | ||
| let opts = Opts::new(metric_name, metric_desc) | ||
| .subsystem("diskpool_alert") | ||
| .variable_labels(vec![ | ||
| "node".to_string(), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's place these in a vec and reuse it here and below to avoid discrepancies
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. node is part of Collector struct now. Is that what you meant? |
||
| "name".to_string(), | ||
| "unknown".to_string(), | ||
| "io_stalled".to_string(), | ||
| "io_stall_intermittent".to_string(), | ||
| "io_stall_intermittent_exc".to_string(), | ||
| "io_error".to_string(), | ||
| "io_error_exc".to_string(), | ||
| ]); | ||
| let gauge_vec = GaugeVec::new( | ||
| opts, | ||
| &[ | ||
| "node", | ||
| "name", | ||
| "unknown", | ||
| "io_stalled", | ||
| "io_stall_intermittent", | ||
| "io_stall_intermittent_exc", | ||
| "io_error", | ||
| "io_error_exc", | ||
| ], | ||
| ) | ||
| .unwrap_or_else(|_| panic!("Unable to create gauge metric type for {metric_name}")); | ||
| descs.extend(gauge_vec.desc().into_iter().cloned()); | ||
| gauge_vec | ||
| } | ||
|
|
||
| /// Initializes a GaugeVec metric for volume with the provided metric name, description and | ||
| /// descriptors. | ||
| fn init_volume_gauge_vec(metric_name: &str, metric_desc: &str, descs: &mut Vec<Desc>) -> GaugeVec { | ||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this the enum value?
In the node exporter, online is being created as a gauge, would it be better to do the same here, and create gauges for each status? (I don't know how these are typically exported...)