Skip to content

Commit fd137bf

Browse files
authored
feat(new_metrics): show disk-level resource usage by shell nodes command based on new metrics (apache#1889)
1 parent 41cba64 commit fd137bf

File tree

5 files changed

+61
-32
lines changed

5 files changed

+61
-32
lines changed

src/common/fs_manager.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929
#include <fmt/std.h> // IWYU pragma: keep
3030
#include <algorithm>
31-
#include <cmath>
3231
#include <cstdint>
3332
#include <utility>
3433

@@ -41,6 +40,7 @@
4140
#include "utils/fail_point.h"
4241
#include "utils/filesystem.h"
4342
#include "utils/fmt_logging.h"
43+
#include "utils/math.h"
4444
#include "utils/ports.h"
4545

4646
METRIC_DEFINE_entity(disk);
@@ -171,17 +171,16 @@ void dir_node::update_disk_stat()
171171

172172
disk_capacity_mb = dsi.capacity >> 20;
173173
disk_available_mb = dsi.available >> 20;
174-
disk_available_ratio = static_cast<int>(
175-
disk_capacity_mb == 0 ? 0 : std::round(disk_available_mb * 100.0 / disk_capacity_mb));
174+
disk_available_ratio = dsn::utils::calc_percentage<int>(disk_available_mb, disk_capacity_mb);
176175

177176
METRIC_SET(disk_capacity, disk_capacity_total_mb, disk_capacity_mb);
178177
METRIC_SET(disk_capacity, disk_capacity_avail_mb, disk_available_mb);
179178

180179
// It's able to change status from NORMAL to SPACE_INSUFFICIENT, and vice versa.
181-
disk_status::type old_status = status;
182-
auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio
183-
? disk_status::SPACE_INSUFFICIENT
184-
: disk_status::NORMAL;
180+
const disk_status::type old_status = status;
181+
const auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio
182+
? disk_status::SPACE_INSUFFICIENT
183+
: disk_status::NORMAL;
185184
if (old_status != new_status) {
186185
status = new_status;
187186
}
@@ -389,8 +388,7 @@ void fs_manager::update_disk_stat()
389388
min_available_ratio = std::min(dn->disk_available_ratio, min_available_ratio);
390389
max_available_ratio = std::max(dn->disk_available_ratio, max_available_ratio);
391390
}
392-
total_available_ratio = static_cast<int>(
393-
total_capacity_mb == 0 ? 0 : std::round(total_available_mb * 100.0 / total_capacity_mb));
391+
total_available_ratio = dsn::utils::calc_percentage<int>(total_available_mb, total_capacity_mb);
394392

395393
LOG_INFO("update disk space succeed: disk_count = {}, total_capacity_mb = {}, "
396394
"total_available_mb = {}, total_available_ratio = {}%, min_available_ratio = {}%, "

src/replica/test/replica_disk_test_base.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <gtest/gtest.h>
2121
#include "utils/fail_point.h"
2222
#include "utils/fmt_logging.h"
23+
#include "utils/math.h"
2324

2425
#include "replica/test/replica_test_base.h"
2526

@@ -159,12 +160,12 @@ class replica_disk_test_base : public replica_test_base
159160

160161
void generate_mock_dir_nodes(int num)
161162
{
162-
int64_t disk_capacity_mb = num * 100;
163+
const int64_t disk_capacity_mb = num * 100;
163164
int count = 0;
164165
while (count++ < num) {
165-
int64_t disk_available_mb = count * 50;
166-
int disk_available_ratio =
167-
static_cast<int>(std::round((double)100 * disk_available_mb / disk_capacity_mb));
166+
const int64_t disk_available_mb = count * 50;
167+
const auto disk_available_ratio =
168+
dsn::utils::calc_percentage<int>(disk_available_mb, disk_capacity_mb);
168169
// create one mock dir_node and make sure disk_capacity_mb_ > disk_available_mb_
169170
dir_node *node_disk = new dir_node("tag_" + std::to_string(count),
170171
"./tag_" + std::to_string(count),

src/shell/commands/node_management.cpp

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "utils/blob.h"
4848
#include "utils/error_code.h"
4949
#include "utils/errors.h"
50+
#include "utils/math.h"
5051
#include "utils/metrics.h"
5152
#include "utils/output_utils.h"
5253
#include "utils/ports.h"
@@ -98,11 +99,13 @@ dsn::metric_filters resource_usage_filters()
9899
{
99100
dsn::metric_filters filters;
100101
filters.with_metric_fields = {dsn::kMetricNameField, dsn::kMetricSingleValueField};
101-
filters.entity_types = {"server", "replica"};
102+
filters.entity_types = {"server", "replica", "disk"};
102103
filters.entity_metrics = {"resident_mem_usage_mb",
103104
"rdb_block_cache_mem_usage_bytes",
104105
"rdb_memtable_mem_usage_bytes",
105-
"rdb_index_and_filter_blocks_mem_usage_bytes"};
106+
"rdb_index_and_filter_blocks_mem_usage_bytes",
107+
"disk_capacity_total_mb",
108+
"disk_capacity_avail_mb"};
106109
return filters;
107110
}
108111

@@ -117,24 +120,48 @@ dsn::error_s parse_resource_usage(const std::string &json_string, list_nodes_hel
117120
return FMT_ERR(dsn::ERR_INVALID_DATA, "invalid json string");
118121
}
119122

123+
int64_t total_capacity_mb = 0;
124+
int64_t total_available_mb = 0;
125+
stat.disk_available_min_ratio = 100;
120126
for (const auto &entity : query_snapshot.entities) {
121-
for (const auto &m : entity.metrics) {
122-
if (entity.type == "server") {
127+
if (entity.type == "server") {
128+
for (const auto &m : entity.metrics) {
123129
if (m.name == "resident_mem_usage_mb") {
124130
stat.memused_res_mb += m.value;
125131
} else if (m.name == "rdb_block_cache_mem_usage_bytes") {
126132
stat.block_cache_bytes += m.value;
127133
}
128-
} else if (entity.type == "replica") {
134+
}
135+
} else if (entity.type == "replica") {
136+
for (const auto &m : entity.metrics) {
129137
if (m.name == "rdb_memtable_mem_usage_bytes") {
130138
stat.mem_tbl_bytes += m.value;
131139
} else if (m.name == "rdb_index_and_filter_blocks_mem_usage_bytes") {
132140
stat.mem_idx_bytes += m.value;
133141
}
134142
}
143+
} else if (entity.type == "disk") {
144+
int64_t capacity_mb = 0;
145+
int64_t available_mb = 0;
146+
for (const auto &m : entity.metrics) {
147+
if (m.name == "disk_capacity_total_mb") {
148+
total_capacity_mb += m.value;
149+
capacity_mb = m.value;
150+
} else if (m.name == "disk_capacity_avail_mb") {
151+
total_available_mb += m.value;
152+
available_mb = m.value;
153+
}
154+
}
155+
156+
const auto available_ratio = dsn::utils::calc_percentage(available_mb, capacity_mb);
157+
stat.disk_available_min_ratio =
158+
std::min(stat.disk_available_min_ratio, available_ratio);
135159
}
136160
}
137161

162+
stat.disk_available_total_ratio =
163+
dsn::utils::calc_percentage(total_available_mb, total_capacity_mb);
164+
138165
return dsn::error_s::ok();
139166
}
140167

@@ -281,10 +308,6 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
281308

282309
const auto &results = get_metrics(nodes, resource_usage_filters().to_query_string());
283310

284-
// TODO(wangdan): following replica-level and disk-level metrics would be replaced:
285-
// "replica*eon.replica_stub*disk.available.total.ratio"
286-
// "replica*eon.replica_stub*disk.available.min.ratio"
287-
288311
for (size_t i = 0; i < nodes.size(); ++i) {
289312
auto tmp_it = tmp_map.find(nodes[i].address);
290313
if (tmp_it == tmp_map.end()) {
@@ -312,15 +335,6 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args)
312335
<< " failed: " << res << std::endl;
313336
return true;
314337
}
315-
316-
// TODO(wangdan): after migrated to new metrics, remove following code:
317-
dsn::perf_counter_info info;
318-
for (dsn::perf_counter_metric &m : info.counters) {
319-
if (m.name.find("disk.available.total.ratio") != std::string::npos)
320-
stat.disk_available_total_ratio += m.value;
321-
else if (m.name.find("disk.available.min.ratio") != std::string::npos)
322-
stat.disk_available_min_ratio += m.value;
323-
}
324338
}
325339
}
326340

src/utils/math.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#include "math.h"
1919

20-
#include <math.h>
2120
#include <algorithm>
2221
#include <numeric>
2322

src/utils/math.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,29 @@
1818
#pragma once
1919

2020
#include <vector>
21+
#include <cmath>
2122
#include <cstdint>
23+
#include <type_traits>
24+
25+
#include "utils/ports.h"
2226

2327
namespace dsn {
2428
namespace utils {
2529

2630
double mean_stddev(const std::vector<uint32_t> &result_set, bool partial_sample);
2731

32+
template <typename TOutput = int64_t,
33+
typename TInput = int64_t,
34+
typename = typename std::enable_if<std::is_arithmetic<TOutput>::value>::type,
35+
typename = typename std::enable_if<std::is_arithmetic<TInput>::value>::type>
36+
TOutput calc_percentage(TInput numerator, TInput denominator)
37+
{
38+
if (dsn_unlikely(denominator == 0)) {
39+
return static_cast<TOutput>(0);
40+
}
41+
42+
return static_cast<TOutput>(std::round(numerator * 100.0 / denominator));
43+
}
44+
2845
} // namespace utils
2946
} // namespace dsn

0 commit comments

Comments
 (0)