Skip to content

Commit 9433514

Browse files
authored
disagg: Add O11y on object store usage summary of each tiflash store (#10764)
ref #10763 disagg: add configurable owner-only S3 storage summary and per-store usage metrics Signed-off-by: JaySon-Huang <tshent@qq.com>
1 parent 74a92f5 commit 9433514

File tree

7 files changed

+597
-38
lines changed

7 files changed

+597
-38
lines changed

dbms/src/Common/TiFlashMetrics.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ TiFlashMetrics::TiFlashMetrics()
7373
.Name("tiflash_storage_ru_read_bytes")
7474
.Help("Read bytes for storage RU calculation")
7575
.Register(*registry);
76+
77+
registered_s3_store_summary_bytes_family = &prometheus::BuildGauge()
78+
.Name("tiflash_storage_s3_store_summary_bytes")
79+
.Help("S3 storage summary bytes by store and file type")
80+
.Register(*registry);
7681
}
7782

7883
void TiFlashMetrics::addReplicaSyncRU(UInt32 keyspace_id, UInt64 ru)
@@ -249,4 +254,37 @@ prometheus::Counter & TiFlashMetrics::getStorageRUReadBytesCounter(
249254
return counter;
250255
}
251256
}
257+
258+
void TiFlashMetrics::setS3StoreSummaryBytes(UInt64 store_id, UInt64 data_file_bytes, UInt64 dt_file_bytes)
259+
{
260+
// Fast path.
261+
{
262+
std::shared_lock lock(s3_store_summary_bytes_mtx);
263+
auto it = registered_s3_store_summary_bytes_metrics.find(store_id);
264+
if (it != registered_s3_store_summary_bytes_metrics.end())
265+
{
266+
it->second.data_file_bytes->Set(data_file_bytes);
267+
it->second.dt_file_bytes->Set(dt_file_bytes);
268+
return;
269+
}
270+
}
271+
272+
std::unique_lock lock(s3_store_summary_bytes_mtx);
273+
auto [it, inserted] = registered_s3_store_summary_bytes_metrics.try_emplace(store_id);
274+
if (inserted)
275+
{
276+
auto store_id_str = std::to_string(store_id);
277+
auto & data_file_bytes_metric
278+
= registered_s3_store_summary_bytes_family->Add({{"store_id", store_id_str}, {"type", "data_file_bytes"}});
279+
auto & dt_file_bytes_metric
280+
= registered_s3_store_summary_bytes_family->Add({{"store_id", store_id_str}, {"type", "dt_file_bytes"}});
281+
it->second = S3StoreSummaryBytesMetrics{
282+
.data_file_bytes = &data_file_bytes_metric,
283+
.dt_file_bytes = &dt_file_bytes_metric,
284+
};
285+
}
286+
287+
it->second.data_file_bytes->Set(data_file_bytes);
288+
it->second.dt_file_bytes->Set(dt_file_bytes);
289+
}
252290
} // namespace DB

dbms/src/Common/TiFlashMetrics.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,8 @@ class TiFlashMetrics
13351335
const String & resource_group,
13361336
const DM::ReadRUType type);
13371337

1338+
void setS3StoreSummaryBytes(UInt64 store_id, UInt64 data_file_bytes, UInt64 dt_file_bytes);
1339+
13381340
private:
13391341
TiFlashMetrics();
13401342

@@ -1376,6 +1378,15 @@ class TiFlashMetrics
13761378
// {keyspace}_{resource_group}_{type} -> Counter
13771379
std::unordered_map<std::string, prometheus::Counter *> registered_storage_ru_read_bytes_metrics;
13781380

1381+
struct S3StoreSummaryBytesMetrics
1382+
{
1383+
prometheus::Gauge * data_file_bytes;
1384+
prometheus::Gauge * dt_file_bytes;
1385+
};
1386+
prometheus::Family<prometheus::Gauge> * registered_s3_store_summary_bytes_family;
1387+
std::shared_mutex s3_store_summary_bytes_mtx;
1388+
std::unordered_map<UInt64, S3StoreSummaryBytesMetrics> registered_s3_store_summary_bytes_metrics;
1389+
13791390
public:
13801391
#define MAKE_METRIC_MEMBER_M(family_name, help, type, ...) \
13811392
MetricFamily<prometheus::type> family_name \

dbms/src/Interpreters/Settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ struct Settings
240240
M(SettingBool, remote_checkpoint_only_upload_manifest, true, "Only upload manifest data when uploading checkpoint") \
241241
M(SettingInt64, remote_gc_method, 1, "The method of running GC task on the remote store. 1 - lifecycle, 2 - scan.") \
242242
M(SettingInt64, remote_gc_interval_seconds, 3600, "The interval of running GC task on the remote store. Unit is second.") \
243+
M(SettingInt64, remote_summary_interval_seconds, 0, "The interval of collecting remote S3 storage summary. Unit is second. <=0 disables periodic summary task.") \
243244
M(SettingInt64, remote_gc_verify_consistency, 0, "[testing] Verify the consistenct of valid locks when doing GC") \
244245
M(SettingInt64, remote_gc_min_age_seconds, 3600, "The file will NOT be compacted when the time difference between the last modification is less than this threshold") \
245246
M(SettingDouble, remote_gc_ratio, 0.5, "The files with valid rate less than this threshold will be compacted") \

dbms/src/Storages/KVStore/TMTContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ void TMTContext::initS3GCManager(const TiFlashRaftProxyHelper * proxy_helper)
222222
}
223223
// TODO: make it reloadable
224224
remote_gc_config.interval_seconds = context.getSettingsRef().remote_gc_interval_seconds;
225+
remote_gc_config.summary_interval_seconds = context.getSettingsRef().remote_summary_interval_seconds;
225226
remote_gc_config.verify_locks = context.getSettingsRef().remote_gc_verify_consistency > 0;
226227
// set the gc_method so that S3LockService can set tagging when create delmark
227228
S3::ClientFactory::instance().gc_method = remote_gc_config.method;

dbms/src/Storages/S3/S3GCManager.cpp

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,11 @@ bool S3GCManager::runOnAllStores()
224224
return false;
225225
}
226226

227+
bool S3GCManager::isOwner() const
228+
{
229+
return gc_owner_manager->isOwner();
230+
}
231+
227232
void S3GCManager::runForStore(UInt64 gc_store_id, LoggerPtr slogger)
228233
{
229234
// get a timepoint at the begin, only remove objects that expired compare
@@ -858,6 +863,17 @@ S3StoreStorageSummary S3GCManager::getStoreStorageSummary(StoreID store_id)
858863
String last_dtfile_key;
859864
size_t num_dtfile_keys_for_last_dtfile = 0;
860865
S3::listPrefix(*client, prefix, [&](const Aws::S3::Model::Object & object) {
866+
if (shutdown_called)
867+
{
868+
LOG_INFO(
869+
log,
870+
"getS3StorageSummary shutting down, break, store_id={} processed_keys={}",
871+
store_id,
872+
num_processed_keys);
873+
// .more=false to break the listing early
874+
return PageResult{.num_keys = 1, .more = false};
875+
}
876+
861877
const auto & key = object.GetKey();
862878
const auto view = S3FilenameView::fromKey(key);
863879
if (watch.elapsedSeconds() - last_elapsed > log_interval_seconds)
@@ -931,6 +947,7 @@ S3StoreStorageSummary S3GCManager::getStoreStorageSummary(StoreID store_id)
931947
return PageResult{.num_keys = 1, .more = true};
932948
});
933949
summary.num_keys = num_processed_keys;
950+
TiFlashMetrics::instance().setS3StoreSummaryBytes(store_id, summary.data_file.bytes, summary.dt_file.bytes);
934951
LOG_INFO(log, "getS3StorageSummary finish, elapsed={:.3f}s summary={}", watch.elapsedSeconds(), summary);
935952
return summary;
936953
}
@@ -956,6 +973,49 @@ S3GCManagerService::S3GCManagerService(
956973
[this]() { return manager->runOnAllStores(); },
957974
false,
958975
/*interval_ms*/ config.interval_seconds * 1000);
976+
977+
if (config.summary_interval_seconds <= 0)
978+
{
979+
LOG_INFO(
980+
Logger::get("S3GCManagerService"),
981+
"The periodic S3 storage summary will be disabled, summary_interval_seconds={}",
982+
config.summary_interval_seconds);
983+
}
984+
else
985+
{
986+
if (config.summary_interval_seconds < 12 * 3600)
987+
{
988+
LOG_WARNING(
989+
Logger::get("S3GCManagerService"),
990+
"The summary_interval_seconds is too small, it may cause high overhead on S3. "
991+
"It is recommended to set it to a value larger than 12 hours (43200 seconds), "
992+
"summary_interval_seconds={}",
993+
config.summary_interval_seconds);
994+
}
995+
996+
summary_timer = global_ctx.getBackgroundPool().addTask(
997+
[this]() {
998+
// Only run summary in the owner instance
999+
if (!manager || !manager->isOwner())
1000+
return false;
1001+
1002+
try
1003+
{
1004+
auto summary = manager->getS3StorageSummary({});
1005+
LOG_INFO(
1006+
Logger::get("S3GCManagerService"),
1007+
"Periodic S3 storage summary finished, num_stores={}",
1008+
summary.stores.size());
1009+
}
1010+
catch (...)
1011+
{
1012+
tryLogCurrentException(Logger::get("S3GCManagerService"), "periodic getS3StorageSummary failed");
1013+
}
1014+
return false;
1015+
},
1016+
false,
1017+
config.summary_interval_seconds * 1000);
1018+
}
9591019
}
9601020

9611021
S3GCManagerService::~S3GCManagerService()
@@ -976,9 +1036,16 @@ void S3GCManagerService::shutdown()
9761036
// Remove the task handler. It will block until the task break
9771037
global_ctx.getBackgroundPool().removeTask(timer);
9781038
timer = nullptr;
979-
// then we can reset the manager
980-
manager = nullptr;
9811039
}
1040+
1041+
if (summary_timer)
1042+
{
1043+
global_ctx.getBackgroundPool().removeTask(summary_timer);
1044+
summary_timer = nullptr;
1045+
}
1046+
1047+
// then we can reset the manager
1048+
manager = nullptr;
9821049
}
9831050

9841051
void S3GCManagerService::wake() const

dbms/src/Storages/S3/S3GCManager.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ struct S3GCConfig
7272
// The interval of the S3 GC routine runs
7373
Int64 interval_seconds = 600;
7474

75+
// The interval of periodic S3 storage summary task.
76+
Int64 summary_interval_seconds = 24 * 60 * 60;
77+
7578
// The maximum number of manifest files preserve
7679
// for each store
7780
size_t manifest_preserve_count = 10;
@@ -144,6 +147,8 @@ class S3GCManager
144147

145148
bool runOnAllStores();
146149

150+
bool isOwner() const;
151+
147152
void shutdown() { shutdown_called = true; }
148153

149154
S3StoreStorageSummary getStoreStorageSummary(StoreID store_id);
@@ -234,6 +239,7 @@ class S3GCManagerService
234239
Context & global_ctx;
235240
std::unique_ptr<S3GCManager> manager;
236241
BackgroundProcessingPool::TaskHandle timer;
242+
BackgroundProcessingPool::TaskHandle summary_timer;
237243
};
238244

239245
} // namespace DB::S3

0 commit comments

Comments
 (0)