@@ -18,6 +18,7 @@ use super::datastore::{
18
18
} ;
19
19
use super :: db_metrics:: DB_METRICS ;
20
20
use crate :: db:: datastore:: system_tables:: StModuleRow ;
21
+ use crate :: db:: MetricsRecorderQueue ;
21
22
use crate :: error:: { DBError , DatabaseError , RestoreSnapshotError } ;
22
23
use crate :: execution_context:: { ReducerContext , Workload , WorkloadType } ;
23
24
use crate :: messages:: control_db:: HostType ;
@@ -110,14 +111,17 @@ pub struct RelationalDB {
110
111
/// `Some` if `durability` is `Some`, `None` otherwise.
111
112
disk_size_fn : Option < DiskSizeFn > ,
112
113
114
+ /// A map from workload types to their cached prometheus counters.
115
+ workload_type_to_exec_counters : Arc < EnumMap < WorkloadType , ExecutionCounters > > ,
116
+
117
+ /// An async queue for recording transaction metrics off the main thread
118
+ metrics_recorder_queue : Option < MetricsRecorderQueue > ,
119
+
113
120
// DO NOT ADD FIELDS AFTER THIS.
114
121
// By default, fields are dropped in declaration order.
115
122
// We want to release the file lock last.
116
123
// TODO(noa): is this lockfile still necessary now that we have data-dir?
117
124
_lock : LockFile ,
118
-
119
- /// A map from workload types to their cached prometheus counters.
120
- workload_type_to_exec_counters : Arc < EnumMap < WorkloadType , ExecutionCounters > > ,
121
125
}
122
126
123
127
#[ derive( Clone ) ]
@@ -231,6 +235,7 @@ impl RelationalDB {
231
235
inner : Locking ,
232
236
durability : Option < ( Arc < Durability > , DiskSizeFn ) > ,
233
237
snapshot_repo : Option < Arc < SnapshotRepository > > ,
238
+ metrics_recorder_queue : Option < MetricsRecorderQueue > ,
234
239
) -> Self {
235
240
let ( durability, disk_size_fn) = durability. unzip ( ) ;
236
241
let snapshot_worker =
@@ -249,8 +254,10 @@ impl RelationalDB {
249
254
row_count_fn : default_row_count_fn ( database_identity) ,
250
255
disk_size_fn,
251
256
252
- _lock : lock,
253
257
workload_type_to_exec_counters,
258
+ metrics_recorder_queue,
259
+
260
+ _lock : lock,
254
261
}
255
262
}
256
263
@@ -324,6 +331,10 @@ impl RelationalDB {
324
331
/// If restoring from an existing database, the `snapshot_repo` must
325
332
/// store views of the same sequence of TXes as the `history`.
326
333
///
334
+ /// - `metrics_recorder_queue`
335
+ ///
336
+ /// The send side of a queue for recording transaction metrics.
337
+ ///
327
338
/// # Return values
328
339
///
329
340
/// Alongside `Self`, [`ConnectedClients`] is returned, which is the set of
@@ -333,13 +344,15 @@ impl RelationalDB {
333
344
/// gracefully. The caller is responsible for disconnecting the clients.
334
345
///
335
346
/// [ModuleHost]: crate::host::module_host::ModuleHost
347
+ #[ allow( clippy:: too_many_arguments) ]
336
348
pub fn open (
337
349
root : & ReplicaDir ,
338
350
database_identity : Identity ,
339
351
owner_identity : Identity ,
340
352
history : impl durability:: History < TxData = Txdata > ,
341
353
durability : Option < ( Arc < Durability > , DiskSizeFn ) > ,
342
354
snapshot_repo : Option < Arc < SnapshotRepository > > ,
355
+ metrics_recorder_queue : Option < MetricsRecorderQueue > ,
343
356
page_pool : PagePool ,
344
357
) -> Result < ( Self , ConnectedClients ) , DBError > {
345
358
log:: trace!( "[{}] DATABASE: OPEN" , database_identity) ;
@@ -373,6 +386,7 @@ impl RelationalDB {
373
386
inner,
374
387
durability,
375
388
snapshot_repo,
389
+ metrics_recorder_queue,
376
390
) ;
377
391
378
392
if let Some ( meta) = db. metadata ( ) ? {
@@ -749,6 +763,11 @@ impl RelationalDB {
749
763
Ok ( AlgebraicValue :: decode ( col_ty, & mut & * bytes) ?)
750
764
}
751
765
766
+ /// Returns the execution counters for this database.
767
+ pub fn exec_counter_map ( & self ) -> Arc < EnumMap < WorkloadType , ExecutionCounters > > {
768
+ self . workload_type_to_exec_counters . clone ( )
769
+ }
770
+
752
771
/// Returns the execution counters for `workload_type` for this database.
753
772
pub fn exec_counters_for ( & self , workload_type : WorkloadType ) -> & ExecutionCounters {
754
773
& self . workload_type_to_exec_counters [ workload_type]
@@ -988,7 +1007,7 @@ impl RelationalDB {
988
1007
let mut tx = self . begin_tx ( workload) ;
989
1008
let res = f ( & mut tx) ;
990
1009
let ( tx_metrics, reducer) = self . release_tx ( tx) ;
991
- self . report_tx_metricses ( & reducer, None , None , & tx_metrics) ;
1010
+ self . report_read_tx_metrics ( reducer, tx_metrics) ;
992
1011
res
993
1012
}
994
1013
@@ -999,11 +1018,11 @@ impl RelationalDB {
999
1018
{
1000
1019
if res. is_err ( ) {
1001
1020
let ( tx_metrics, reducer) = self . rollback_mut_tx ( tx) ;
1002
- self . report ( & reducer, & tx_metrics, None ) ;
1021
+ self . report_mut_tx_metrics ( reducer, tx_metrics, None ) ;
1003
1022
} else {
1004
1023
match self . commit_tx ( tx) . map_err ( E :: from) ? {
1005
1024
Some ( ( tx_data, tx_metrics, reducer) ) => {
1006
- self . report ( & reducer, & tx_metrics, Some ( & tx_data) ) ;
1025
+ self . report_mut_tx_metrics ( reducer, tx_metrics, Some ( tx_data) ) ;
1007
1026
}
1008
1027
None => panic ! ( "TODO: retry?" ) ,
1009
1028
}
@@ -1018,7 +1037,7 @@ impl RelationalDB {
1018
1037
match res {
1019
1038
Err ( e) => {
1020
1039
let ( tx_metrics, reducer) = self . rollback_mut_tx ( tx) ;
1021
- self . report ( & reducer, & tx_metrics, None ) ;
1040
+ self . report_mut_tx_metrics ( reducer, tx_metrics, None ) ;
1022
1041
1023
1042
Err ( e)
1024
1043
}
@@ -1042,17 +1061,22 @@ impl RelationalDB {
1042
1061
/// Reports the `TxMetrics`s passed.
1043
1062
///
1044
1063
/// Should only be called after the tx lock has been fully released.
1045
- pub ( crate ) fn report_tx_metricses (
1064
+ pub ( crate ) fn report_tx_metrics (
1046
1065
& self ,
1047
- reducer : & str ,
1048
- tx_data : Option < & TxData > ,
1049
- metrics_mut : Option < & TxMetrics > ,
1050
- metrics_read : & TxMetrics ,
1066
+ reducer : String ,
1067
+ tx_data : Option < Arc < TxData > > ,
1068
+ metrics_for_writer : Option < TxMetrics > ,
1069
+ metrics_for_reader : Option < TxMetrics > ,
1051
1070
) {
1052
- if let Some ( metrics_mut) = metrics_mut {
1053
- self . report ( reducer, metrics_mut, tx_data) ;
1071
+ if let Some ( recorder) = & self . metrics_recorder_queue {
1072
+ recorder. send_metrics (
1073
+ reducer,
1074
+ metrics_for_writer,
1075
+ metrics_for_reader,
1076
+ tx_data,
1077
+ self . exec_counter_map ( ) ,
1078
+ ) ;
1054
1079
}
1055
- self . report ( reducer, metrics_read, None ) ;
1056
1080
}
1057
1081
}
1058
1082
@@ -1403,8 +1427,13 @@ impl RelationalDB {
1403
1427
}
1404
1428
1405
1429
/// Reports the metrics for `reducer`, using counters provided by `db`.
1406
- pub fn report ( & self , reducer : & str , metrics : & TxMetrics , tx_data : Option < & TxData > ) {
1407
- metrics. report ( tx_data, reducer, |wl : WorkloadType | self . exec_counters_for ( wl) ) ;
1430
+ pub fn report_mut_tx_metrics ( & self , reducer : String , metrics : TxMetrics , tx_data : Option < TxData > ) {
1431
+ self . report_tx_metrics ( reducer, tx_data. map ( Arc :: new) , Some ( metrics) , None ) ;
1432
+ }
1433
+
1434
+ /// Reports subscription metrics for `reducer`, using counters provided by `db`.
1435
+ pub fn report_read_tx_metrics ( & self , reducer : String , metrics : TxMetrics ) {
1436
+ self . report_tx_metrics ( reducer, None , None , Some ( metrics) ) ;
1408
1437
}
1409
1438
1410
1439
/// Read the value of [ST_VARNAME_ROW_LIMIT] from `st_var`
@@ -1779,7 +1808,7 @@ pub mod tests_utils {
1779
1808
expected_num_clients : usize ,
1780
1809
) -> Result < Self , DBError > {
1781
1810
let dir = TempReplicaDir :: new ( ) ?;
1782
- let db = Self :: open_db ( & dir, history, None , None , expected_num_clients) ?;
1811
+ let db = Self :: open_db ( & dir, history, None , None , None , expected_num_clients) ?;
1783
1812
Ok ( Self {
1784
1813
db,
1785
1814
durable : None ,
@@ -1870,7 +1899,7 @@ pub mod tests_utils {
1870
1899
}
1871
1900
1872
1901
fn in_memory_internal ( root : & ReplicaDir ) -> Result < RelationalDB , DBError > {
1873
- Self :: open_db ( root, EmptyHistory :: new ( ) , None , None , 0 )
1902
+ Self :: open_db ( root, EmptyHistory :: new ( ) , None , None , None , 0 )
1874
1903
}
1875
1904
1876
1905
fn durable_internal (
@@ -1884,7 +1913,7 @@ pub mod tests_utils {
1884
1913
let snapshot_repo = want_snapshot_repo
1885
1914
. then ( || open_snapshot_repo ( root. snapshots ( ) , Identity :: ZERO , 0 ) )
1886
1915
. transpose ( ) ?;
1887
- let db = Self :: open_db ( root, history, Some ( ( durability, disk_size_fn) ) , snapshot_repo, 0 ) ?;
1916
+ let db = Self :: open_db ( root, history, Some ( ( durability, disk_size_fn) ) , snapshot_repo, None , 0 ) ?;
1888
1917
1889
1918
Ok ( ( db, local) )
1890
1919
}
@@ -1894,6 +1923,7 @@ pub mod tests_utils {
1894
1923
history : impl durability:: History < TxData = Txdata > ,
1895
1924
durability : Option < ( Arc < Durability > , DiskSizeFn ) > ,
1896
1925
snapshot_repo : Option < Arc < SnapshotRepository > > ,
1926
+ metrics_recorder_queue : Option < MetricsRecorderQueue > ,
1897
1927
expected_num_clients : usize ,
1898
1928
) -> Result < RelationalDB , DBError > {
1899
1929
let ( db, connected_clients) = RelationalDB :: open (
@@ -1903,6 +1933,7 @@ pub mod tests_utils {
1903
1933
history,
1904
1934
durability,
1905
1935
snapshot_repo,
1936
+ metrics_recorder_queue,
1906
1937
PagePool :: new_for_test ( ) ,
1907
1938
) ?;
1908
1939
assert_eq ! ( connected_clients. len( ) , expected_num_clients) ;
@@ -2151,6 +2182,7 @@ mod tests {
2151
2182
EmptyHistory :: new ( ) ,
2152
2183
None ,
2153
2184
None ,
2185
+ None ,
2154
2186
PagePool :: new_for_test ( ) ,
2155
2187
) {
2156
2188
Ok ( _) => {
0 commit comments