|
8 | 8 | #include "db_config.h" |
9 | 9 | #include "dbinc/db_swap.h" |
10 | 10 |
|
| 11 | +#include <time.h> |
| 12 | +#include <epochlib.h> |
| 13 | + |
11 | 14 | #ifndef lint |
12 | 15 | static const char revid[] = |
13 | 16 | "$Id: rep_record.c,v 1.193 2003/11/14 05:32:31 ubell Exp $"; |
@@ -1083,7 +1086,6 @@ __rep_verify_will_recover(dbenv, control, rec) |
1083 | 1086 | * PUBLIC: int __rep_process_message __P((DB_ENV *, DBT *, DBT *, char**, |
1084 | 1087 | * PUBLIC: DB_LSN *, uint32_t *,uint32_t *, char **, int)); |
1085 | 1088 | */ |
1086 | | - |
1087 | 1089 | int |
1088 | 1090 | __rep_process_message(dbenv, control, rec, eidp, ret_lsnp, commit_gen, newgen, newmaster, online) |
1089 | 1091 | DB_ENV *dbenv; |
@@ -1117,17 +1119,24 @@ __rep_process_message(dbenv, control, rec, eidp, ret_lsnp, commit_gen, newgen, n |
1117 | 1119 | static int rpm_pr = 0; |
1118 | 1120 | int rpm_now; |
1119 | 1121 | #endif |
| 1122 | + time_t now; |
1120 | 1123 | int send_count = 0; |
1121 | 1124 | static time_t verify_req_print = 0; |
1122 | 1125 | static unsigned long long verify_req_count = 0; |
1123 | 1126 | unsigned long long bytes_behind; |
1124 | | - time_t now; |
1125 | | - |
1126 | | - |
| 1127 | + time_t start_rep_all, end_rep_all; |
| 1128 | + int nlsns = 0; |
| 1129 | + DB_LSN rep_start_lsn; |
1127 | 1130 | u_int32_t vi_last_write_gen, vi_egen; |
1128 | 1131 | int vi_nsites, vi_priority, vi_tiebreaker; |
1129 | 1132 |
|
1130 | 1133 | char *master; |
| 1134 | + static time_t report_last = 0; |
| 1135 | + time_t report_now = time(NULL); |
| 1136 | + |
| 1137 | + if (report_last == 0) { |
| 1138 | + report_last = report_now; |
| 1139 | + } |
1131 | 1140 |
|
1132 | 1141 | PANIC_CHECK(dbenv); |
1133 | 1142 | ENV_REQUIRES_CONFIG(dbenv, dbenv->rep_handle, "rep_process_message", |
@@ -1156,6 +1165,17 @@ __rep_process_message(dbenv, control, rec, eidp, ret_lsnp, commit_gen, newgen, n |
1156 | 1165 | if (LOG_SWAPPED()) |
1157 | 1166 | __rep_control_swap(rp); |
1158 | 1167 |
|
| 1168 | + if (report_now != report_last && IS_REP_CLIENT(dbenv)) { |
| 1169 | + report_last = report_now; |
| 1170 | + if (dbenv->coherency_check_callback && dbenv->coherency_check_callback(dbenv->coherency_check_usrptr) == 0) { |
| 1171 | + logmsg(LOGMSG_USER, "gen %u dups %u queued %u ready at %u:%u rep at %u:%u last recv %u:%u\n", |
| 1172 | + rep->stat.st_gen, rep->stat.st_log_duplicated, rep->stat.st_log_queued, |
| 1173 | + lp->ready_lsn.file, lp->ready_lsn.offset, |
| 1174 | + lp->waiting_lsn.file, lp->waiting_lsn.offset, |
| 1175 | + rp->lsn.file, rp->lsn.offset); |
| 1176 | + } |
| 1177 | + } |
| 1178 | + |
1159 | 1179 | if (gbl_verbose_master_req) { |
1160 | 1180 | switch (rp->rectype) { |
1161 | 1181 | case REP_MASTER_REQ: |
@@ -1505,9 +1525,12 @@ __rep_process_message(dbenv, control, rec, eidp, ret_lsnp, commit_gen, newgen, n |
1505 | 1525 | flags = IS_ZERO_LSN(rp->lsn) || |
1506 | 1526 | IS_INIT_LSN(rp->lsn) ? DB_FIRST : DB_SET; |
1507 | 1527 | sendflags = DB_REP_SENDACK; |
| 1528 | + start_rep_all = comdb2_time_epochms(); |
| 1529 | + rep_start_lsn = lsn; |
1508 | 1530 | for (ret = __log_c_get(logc, &lsn, &data_dbt, flags); |
1509 | 1531 | ret == 0 && type != REP_LOG_MORE; |
1510 | 1532 | ret = __log_c_get(logc, &lsn, &data_dbt, DB_NEXT)) { |
| 1533 | + nlsns++; |
1511 | 1534 | /* |
1512 | 1535 | * When a log file changes, we'll have a real log |
1513 | 1536 | * record with some lsn [n][m], and we'll also want |
@@ -1581,6 +1604,8 @@ __rep_process_message(dbenv, control, rec, eidp, ret_lsnp, commit_gen, newgen, n |
1581 | 1604 | oldfilelsn = lsn; |
1582 | 1605 | oldfilelsn.offset += logc->c_len; |
1583 | 1606 | } |
| 1607 | + end_rep_all = comdb2_time_epochms(); |
| 1608 | + logmsg(LOGMSG_USER, "sent %d lsns in %d ms %u:%u to %u:%u\n", nlsns, (int) (end_rep_all - start_rep_all), rep_start_lsn.file, rep_start_lsn.offset, lsn.file, lsn.offset); |
1584 | 1609 |
|
1585 | 1610 | if (gbl_verbose_fills){ |
1586 | 1611 | logmsg(LOGMSG_USER, "%s line %d done REP_ALL fill for %s to " |
@@ -3031,6 +3056,8 @@ __thread int disable_random_deadlocks = 0; |
3031 | 3056 | __thread int physrep_out_of_order = 0; |
3032 | 3057 | __thread DB_LSN commit_lsn = {0}; |
3033 | 3058 |
|
| 3059 | +extern int gbl_always_request_log_req; |
| 3060 | + |
3034 | 3061 | /* |
3035 | 3062 | * __rep_apply -- |
3036 | 3063 | * |
@@ -3068,6 +3095,7 @@ __rep_apply_int(dbenv, rp, rec, ret_lsnp, commit_gen, decoupled) |
3068 | 3095 | int num_retries; |
3069 | 3096 | int disabled_minwrite_noread = 0; |
3070 | 3097 | char *eid, *dist_txnid = NULL; |
| 3098 | + time_t now = comdb2_time_epoch(); |
3071 | 3099 |
|
3072 | 3100 | db_rep = dbenv->rep_handle; |
3073 | 3101 | rep = db_rep->region; |
@@ -3149,7 +3177,12 @@ __rep_apply_int(dbenv, rp, rec, ret_lsnp, commit_gen, decoupled) |
3149 | 3177 | (void)count_in_func; |
3150 | 3178 | lp = dblp->reginfo.primary; |
3151 | 3179 | cmp = log_compare(&rp->lsn, &lp->ready_lsn); |
3152 | | - |
| 3180 | + if (now != lp->last_log_record_time) { |
| 3181 | + lp->last_log_record_time = now; |
| 3182 | + lp->records_last_second = 0; |
| 3183 | + } |
| 3184 | + if (cmp > 0) |
| 3185 | + lp->records_last_second++; |
3153 | 3186 | /* |
3154 | 3187 | * fprintf(stderr, "Rep log file %s line %d for %d:%d ready_lsn is %d:%d cmp=%d\n", |
3155 | 3188 | * __FILE__, __LINE__, rp->lsn.file, rp->lsn.offset, lp->ready_lsn.file, |
@@ -3430,8 +3463,14 @@ gap_check: max_lsn_dbtp = NULL; |
3430 | 3463 | */ |
3431 | 3464 | next_lsn = lp->ready_lsn; |
3432 | 3465 | do_req = ++lp->rcvd_recs >= lp->wait_recs; |
| 3466 | + /* We used to have an explicit do_req=1 here. Presumably this fixes a case |
| 3467 | + * where we haven't seen much traffic in the past, and get a few records |
| 3468 | + * but not enough to trigger a request. Unfortunately the back-and-forth |
| 3469 | + * requests this generates slow down catchup whene the replicant is very far |
| 3470 | + * behind and has multiple gaps. Instead we request on a timer from elsewhere. */ |
3433 | 3471 |
|
3434 | | - do_req = 1; |
| 3472 | + if (gbl_always_request_log_req) |
| 3473 | + do_req = 1; |
3435 | 3474 |
|
3436 | 3475 | if (do_req) { |
3437 | 3476 | lp->wait_recs = rep->request_gap; |
@@ -3483,7 +3522,10 @@ gap_check: max_lsn_dbtp = NULL; |
3483 | 3522 | max_lsn_dbtp->data); |
3484 | 3523 | max_lsn_dbtp->data = &tmp_lsn; |
3485 | 3524 | } |
3486 | | - |
| 3525 | + else { |
| 3526 | + ZERO_LSN(tmp_lsn); |
| 3527 | + } |
| 3528 | + // fprintf(stderr, "Requesting %u:%u - %u:%u ready %u:%u waiting %u:%u\n", next_lsn.file, next_lsn.offset, tmp_lsn.file, tmp_lsn.offset, lp->ready_lsn.file, lp->ready_lsn.offset, lp->waiting_lsn.file, lp->waiting_lsn.offset); |
3487 | 3529 | /* |
3488 | 3530 | * fprintf(stderr, "Requesting file %s line %d lsn %d:%d\n", |
3489 | 3531 | * __FILE__, __LINE__, next_lsn.file, next_lsn.offset); |
@@ -8858,6 +8900,42 @@ __rep_inflight_txns_older_than_lsn(DB_ENV *dbenv, DB_LSN *lsn) |
8858 | 8900 | return 0; |
8859 | 8901 | } |
8860 | 8902 |
|
| 8903 | +// PUBLIC: int __dbenv_get_rep_lsns __P((DB_ENV *, DB_LSN *, DB_LSN *, int *)); |
| 8904 | +int |
| 8905 | +__dbenv_get_rep_lsns(dbenv, ready_lsn, gap_lsn, nrecs) |
| 8906 | +DB_ENV *dbenv; |
| 8907 | +DB_LSN *ready_lsn; |
| 8908 | +DB_LSN *gap_lsn; |
| 8909 | +int *nrecs; |
| 8910 | +{ |
| 8911 | + DB_LOG *dblp; |
| 8912 | + LOG *lp; |
| 8913 | + DB_REP *db_rep; |
| 8914 | + |
| 8915 | + db_rep = dbenv->rep_handle; |
| 8916 | + dblp = dbenv->lg_handle; |
| 8917 | + lp = dblp->reginfo.primary; |
| 8918 | + |
| 8919 | + MUTEX_LOCK(dbenv, db_rep->rep_mutexp); |
| 8920 | + *ready_lsn = lp->ready_lsn; |
| 8921 | + *gap_lsn = lp->waiting_lsn; |
| 8922 | + *nrecs = lp->records_last_second; |
| 8923 | + MUTEX_UNLOCK(dbenv, db_rep->rep_mutexp); |
| 8924 | + return 0; |
| 8925 | +} |
| 8926 | + |
| 8927 | +// PUBLIC: int __dbenv_set_coherency_check_callback __P((DB_ENV *, int(*)(void*), void*)); |
| 8928 | +int |
| 8929 | +__dbenv_set_coherency_check_callback(dbenv, callback, usrptr) |
| 8930 | +DB_ENV *dbenv; |
| 8931 | +int (*callback)(void*); |
| 8932 | +void *usrptr; |
| 8933 | +{ |
| 8934 | + dbenv->coherency_check_callback = callback; |
| 8935 | + dbenv->coherency_check_usrptr = usrptr; |
| 8936 | + return 0; |
| 8937 | +} |
| 8938 | + |
8861 | 8939 | /* Not crazy about leaving this here. This is used in bdb and berkdb. It's |
8862 | 8940 | * initialized in db, early in main. It doesn't really belong in any one place. */ |
8863 | 8941 | char *db_eid_broadcast = NULL; |
|
0 commit comments