Skip to content

Commit 92e3374

Browse files
WANG Weinanweinan003
authored andcommitted
Feat: Enable hot DR cluster
The most of feature is done by upsteam, but hot DR qd can not organzie cdbcomponent by `gp_segment_configuration` rel. Define a boolean GUC name as `hot_dr`, if the `hot_dr` enable, read cluster infor from segconf file.
1 parent 44a9f90 commit 92e3374

File tree

10 files changed

+80
-10
lines changed

10 files changed

+80
-10
lines changed

.github/workflows/build-cloudberry.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ jobs:
310310
{"test":"ic-isolation2",
311311
"make_configs":["src/test/isolation2:installcheck-isolation2"]
312312
},
313+
{"test":"ic-isolation2-hot-standby",
314+
"make_configs":["src/test/isolation2:installcheck-hot-standby"]
315+
},
313316
{"test":"ic-isolation2-crash",
314317
"make_configs":["src/test/isolation2:installcheck-isolation2-crash"],
315318
"enable_core_check":false

src/backend/access/transam/xlog.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ int XLogArchiveTimeout = 0;
114114
int XLogArchiveMode = ARCHIVE_MODE_OFF;
115115
char *XLogArchiveCommand = NULL;
116116
bool EnableHotStandby = false;
117+
bool EnableHotDR = false;
117118
bool fullPageWrites = true;
118119
bool wal_log_hints = false;
119120
bool wal_compression = false;
@@ -7967,6 +7968,12 @@ StartupXLOG(void)
79677968
if (gp_pause_on_restore_point_replay)
79687969
pauseRecoveryOnRestorePoint(xlogreader);
79697970

7971+
/* Exit the recovery loop if a promotion is triggered in pauseRecoveryOnRestorePoint() */
7972+
if (reachedContinuousRecoveryTarget && recoveryTargetAction == RECOVERY_TARGET_ACTION_PROMOTE){
7973+
reachedRecoveryTarget = true;
7974+
break;
7975+
}
7976+
79707977
/* Exit loop if we reached inclusive recovery target */
79717978
if (recoveryStopsAfter(xlogreader))
79727979
{
@@ -10757,6 +10764,9 @@ XLogRestorePoint(const char *rpName)
1075710764
xlrec.rp_time = GetCurrentTimestamp();
1075810765
strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
1075910766

10767+
/* LogHotStandby for the restore here */
10768+
LogStandbySnapshot();
10769+
1076010770
XLogBeginInsert();
1076110771
XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
1076210772

src/backend/cdb/cdbutil.c

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ static int CdbComponentDatabaseInfoCompare(const void *p1, const void *p2);
9292

9393
static GpSegConfigEntry * readGpSegConfigFromCatalog(int *total_dbs);
9494
static GpSegConfigEntry * readGpSegConfigFromFTSFiles(int *total_dbs);
95+
static GpSegConfigEntry * readGpSegConfigFromFiles(int *total_dbs);
9596

9697
static void getAddressesForDBid(GpSegConfigEntry *c, int elevel);
9798
static HTAB *hostPrimaryCountHashTableInit(void);
@@ -131,6 +132,15 @@ typedef struct HostPrimaryCountEntry
131132
*/
132133
static GpSegConfigEntry *
133134
readGpSegConfigFromFTSFiles(int *total_dbs)
135+
{
136+
Assert(!IsTransactionState() && !IS_HOT_DR_CLUSTER());
137+
/* notify and wait FTS to finish a probe and update the dump file */
138+
FtsNotifyProber();
139+
return readGpSegConfigFromFiles(total_dbs);
140+
}
141+
142+
static GpSegConfigEntry *
143+
readGpSegConfigFromFiles(int *total_dbs)
134144
{
135145
FILE *fd;
136146
int idx = 0;
@@ -142,11 +152,6 @@ readGpSegConfigFromFTSFiles(int *total_dbs)
142152
char address[MAXHOSTNAMELEN];
143153
char buf[MAXHOSTNAMELEN * 2 + 32];
144154

145-
Assert(!IsTransactionState());
146-
147-
/* notify and wait FTS to finish a probe and update the dump file */
148-
FtsNotifyProber();
149-
150155
fd = AllocateFile(GPSEGCONFIGDUMPFILE, "r");
151156

152157
if (!fd)
@@ -188,6 +193,18 @@ readGpSegConfigFromFTSFiles(int *total_dbs)
188193
return configs;
189194
}
190195

196+
bool
197+
checkGpSegConfigFtsFiles()
198+
{
199+
FILE *fd = AllocateFile(GPSEGCONFIGDUMPFILE, "r");
200+
201+
if (!fd)
202+
return false;
203+
204+
FreeFile(fd);
205+
return true;
206+
}
207+
191208
/*
192209
* writeGpSegConfigToFTSFiles() dump gp_segment_configuration to the file
193210
* GPSEGCONFIGDUMPFILE, in $PGDATA, only FTS process can use this function.
@@ -372,10 +389,17 @@ getCdbComponentInfo(void)
372389

373390
HTAB *hostPrimaryCountHash = hostPrimaryCountHashTableInit();
374391

375-
if (IsTransactionState())
376-
configs = readGpSegConfigFromCatalog(&total_dbs);
392+
if (EnableHotDR)
393+
{
394+
configs = readGpSegConfigFromFiles(&total_dbs);
395+
}
377396
else
378-
configs = readGpSegConfigFromFTSFiles(&total_dbs);
397+
{
398+
if (IsTransactionState())
399+
configs = readGpSegConfigFromCatalog(&total_dbs);
400+
else
401+
configs = readGpSegConfigFromFTSFiles(&total_dbs);
402+
}
379403

380404
component_databases = palloc0(sizeof(CdbComponentDatabases));
381405

src/backend/utils/misc/guc_gp.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ static bool check_optimizer(bool *newval, void **extra, GucSource source);
8686
static bool check_verify_gpfdists_cert(bool *newval, void **extra, GucSource source);
8787
static bool check_dispatch_log_stats(bool *newval, void **extra, GucSource source);
8888
static bool check_gp_workfile_compression(bool *newval, void **extra, GucSource source);
89+
static bool check_hot_dr(bool *newval, void **extra, GucSource source);
8990

9091
/* Helper function for guc setter */
9192
bool gpvars_check_gp_resqueue_priority_default_value(char **newval,
@@ -3331,6 +3332,16 @@ struct config_bool ConfigureNamesBool_gp[] =
33313332
NULL, NULL, NULL
33323333
},
33333334

3335+
{
3336+
{"hot_dr", PGC_POSTMASTER, REPLICATION_STANDBY,
3337+
gettext_noop("DR Cluster as well as allows connteions and queries"),
3338+
NULL
3339+
},
3340+
&EnableHotDR,
3341+
false,
3342+
check_hot_dr, NULL, NULL
3343+
},
3344+
33343345
{
33353346
{"gp_enable_runtime_filter_pushdown", PGC_USERSET, DEVELOPER_OPTIONS,
33363347
gettext_noop("Try to push the hash table of hash join to the seqscan or AM as bloom filter."),
@@ -5455,6 +5466,22 @@ check_verify_gpfdists_cert(bool *newval, void **extra, GucSource source)
54555466
return true;
54565467
}
54575468

5469+
static bool
5470+
check_hot_dr(bool *newval, void **extra, GucSource source)
5471+
{
5472+
if (*newval && !EnableHotStandby)
5473+
ereport(ERROR,
5474+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5475+
errmsg("cannot enable \"hot_dr\" when \"hot_standby\" is false")));
5476+
5477+
if (*newval && IS_QUERY_DISPATCHER() && !checkGpSegConfigFtsFiles())
5478+
ereport(ERROR,
5479+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5480+
errmsg("cannot enable \"hot_dr\" since DR cluster segment configuration file does not exits")));
5481+
5482+
return true;
5483+
}
5484+
54585485
static bool
54595486
check_dispatch_log_stats(bool *newval, void **extra, GucSource source)
54605487
{

src/include/access/xlog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ extern int XLogArchiveTimeout;
123123
extern int wal_retrieve_retry_interval;
124124
extern char *XLogArchiveCommand;
125125
extern bool EnableHotStandby;
126+
extern bool EnableHotDR;
126127

127128
extern bool fullPageWrites;
128129
extern bool wal_log_hints;

src/include/cdb/cdbutil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ extern char *getDnsAddress(char *name, int port, int elevel);
132132

133133
#ifdef USE_INTERNAL_FTS
134134
extern void writeGpSegConfigToFTSFiles(void);
135+
extern bool checkGpSegConfigFtsFiles(void);
135136
#else
136137

137138
GpSegConfigEntry * readGpSegConfig(char * buff, int *total_dbs);

src/include/cdb/cdbvars.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,7 @@ extern GpId GpIdentity;
757757
#define MAX_DBID_STRING_LENGTH 11
758758

759759
#define UNINITIALIZED_GP_IDENTITY_VALUE (-10000)
760+
#define IS_HOT_DR_CLUSTER() (EnableHotDR)
760761
#define IS_QUERY_DISPATCHER() (GpIdentity.segindex == MASTER_CONTENT_ID)
761762
#define IS_HOT_STANDBY_QD() (EnableHotStandby && IS_QUERY_DISPATCHER() && RecoveryInProgress())
762763

src/include/utils/unsync_guc_name.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@
294294
"gp_workfile_limit_per_segment",
295295
"gp_workfile_max_entries",
296296
"hba_file",
297+
"hot_dr",
297298
"hot_standby",
298299
"hot_standby_feedback",
299300
"huge_pages",

src/test/isolation2/expected/hot_standby/faults.out

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,15 @@ select gp_inject_fault('out_of_recovery_in_startupxlog', 'reset', dbid) from gp_
133133
ERROR: primary segments can only process MPP protocol messages from primary QD (seg1 slice1 127.0.1.1:7006 pid=14671)
134134
HINT: Exit the current session and re-connect.
135135
-1Sq: ... <quitting>
136-
136+
-- start_ignore
137137
-- will fail due to downed mirror (previous primary)
138138
-1S: select * from hs_failover;
139139
ERROR: failed to acquire resources on one or more segments
140140
DETAIL: connection to server at "10.13.9.74", port 7003 failed: Connection refused
141141
Is the server running on that host and accepting TCP/IP connections?
142142
(seg1 10.13.9.74:7003)
143143
-1Sq: ... <quitting>
144+
-- end_ignore
144145

145146
-- bring the downed mirror up
146147
!\retcode gprecoverseg -aF;

src/test/isolation2/sql/hot_standby/faults.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,11 @@ select gp_inject_fault('out_of_recovery_in_startupxlog', 'reset', dbid) from gp_
5959
-- in an existing gang. That mirror is now a primary, so it will complain and the query fails.
6060
-1S: select * from hs_failover;
6161
-1Sq:
62-
62+
-- start_ignore
6363
-- will fail due to downed mirror (previous primary)
6464
-1S: select * from hs_failover;
6565
-1Sq:
66+
-- end_ignore
6667

6768
-- bring the downed mirror up
6869
!\retcode gprecoverseg -aF;

0 commit comments

Comments
 (0)