diff --git a/.abi-check/7.1.0/postgres.symbols.ignore b/.abi-check/7.1.0/postgres.symbols.ignore
index 848dbf2841d..d42d77c4039 100644
--- a/.abi-check/7.1.0/postgres.symbols.ignore
+++ b/.abi-check/7.1.0/postgres.symbols.ignore
@@ -1 +1,12 @@
 pgarch_start
+ConfigureNamesInt_gp
+child_triggers
+has_update_triggers
+ConfigureNamesBool_gp
+aocs_beginscan
+AppendOnlyBlockDirectory_GetEntry
+ConfigureNamesString_gp
+gp_pause_on_restore_point_replay
+ConfigureNamesReal_gp
+TableAmRoutine
+MainLWLockNames
diff --git a/GNUmakefile.in b/GNUmakefile.in
index e6333e39bec..7b44f13dbf5 100644
--- a/GNUmakefile.in
+++ b/GNUmakefile.in
@@ -212,6 +212,11 @@ installcheck-gpcheckcat:
 $(call recurse,installcheck-world,gpcontrib/gp_replica_check,installcheck)
 $(call recurse,installcheck-world,src/bin/pg_upgrade,check)
 
+.PHONY: installcheck-hot-standby
+installcheck-hot-standby: submake-generated-headers
+	$(MAKE) -C src/test/regress installcheck-hot-standby
+	$(MAKE) -C src/test/isolation2 installcheck-hot-standby
+
 # Run mock tests, that don't require a running server. Arguably these should
 # be part of [install]check-world, but we treat them more like part of
 # compilation than regression testing, in the CI. But they are too heavy-weight
diff --git a/gpMgmt/bin/gpstart b/gpMgmt/bin/gpstart
index 6937d86ac51..2ce947885d5 100755
--- a/gpMgmt/bin/gpstart
+++ b/gpMgmt/bin/gpstart
@@ -55,7 +55,8 @@ class GpStart:
                  skip_heap_checksum_validation=False,
                  fts_hosts=None,
                  etcd_hosts=None,
-                 is_external_fts=False
+                 is_external_fts=False,
+                 segment_config_file=None
                  ):
         assert (specialMode in [None, 'maintenance'])
         self.specialMode = specialMode
@@ -78,6 +79,7 @@ class GpStart:
         self.etcd_hosts = etcd_hosts
         self.is_external_fts = is_external_fts
         self.singlenodemode = False
+        self.segment_config_file = segment_config_file
 
         #
         # Some variables that are set during execution
@@ -510,7 +512,11 @@ class GpStart:
 
         logger.info("Obtaining Segment details from coordinator...")
         self.dburl = dbconn.DbURL(port=self.port, dbname='template1')
-        self.gparray = GpArray.initFromCatalog(self.dburl, utility=True)
+        if self.segment_config_file:
+            self.gparray = GpArray.initFromFile(self.segment_config_file)
+            self.gparray.is_singlenode= False
+        else:
+            self.gparray = GpArray.initFromCatalog(self.dburl, utility=True)
 
         logger.info("Setting new coordinator era")
         e = GpEraFile(self.coordinator_datadir, logger=get_logger_if_verbose())
@@ -876,6 +882,8 @@ class GpStart:
         addTo.add_option('-E', dest='etcd_hosts', type='string',default=None ,
                          help='specify the file that contains all etcd hosts.If this argument is set, `gpstart` will attempt'
                                 'to start all etcd in the specified hosts')
+        addTo.add_option('-f', '--segment_config_file', dest='segment_config_file', type='string', default=None,
+                         help='specify the gp_segment_configuration file to load for this cluster')
 
         parser.set_defaults(verbose=False, filters=[], slice=(None, None))
 
@@ -922,7 +930,8 @@ class GpStart:
                        skip_heap_checksum_validation=options.skip_heap_checksum_validation,
                        fts_hosts=options.fts_hosts,
                        etcd_hosts=options.etcd_hosts,
-                       is_external_fts=external_fts
+                       is_external_fts=external_fts,
+                       segment_config_file=options.segment_config_file
                        )
 
 
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 048ce9231a9..0003425b79f 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -530,6 +530,14 @@ heapgetpage(TableScanDesc sscan, BlockNumber page)
 
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
+#ifdef FAULT_INJECTOR
+	FaultInjector_InjectFaultIfSet(
+								   "heapgetpage_after_unlock_buffer",
+								   DDLNotSpecified,
+								   "",	/* databaseName */
+								   RelationGetRelationName(scan->rs_base.rs_rd));	/* tableName */
+#endif
+
 	Assert(ntup <= MaxHeapTuplesPerPage);
 	scan->rs_ntuples = ntup;
 }
diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c
index 01ee7ac6d2c..899c621b240 100644
--- a/src/backend/access/rmgrdesc/standbydesc.c
+++ b/src/backend/access/rmgrdesc/standbydesc.c
@@ -66,6 +66,14 @@ standby_desc(StringInfo buf, XLogReaderState *record)
 								   xlrec->dbId, xlrec->tsId,
 								   xlrec->relcacheInitFileInval);
 	}
+	else if (info == XLOG_LATESTCOMPLETED_GXID)
+	{
+		DistributedTransactionId gxid;
+
+		gxid = *((DistributedTransactionId *) rec);
+		appendStringInfo(buf, UINT64_FORMAT, gxid);
+	}
+
 }
 
 const char *
@@ -84,6 +92,9 @@ standby_identify(uint8 info)
 		case XLOG_INVALIDATIONS:
 			id = "INVALIDATIONS";
 			break;
+		case XLOG_LATESTCOMPLETED_GXID:
+			id = "XLOG_LATESTCOMPLETED_GXID";
+			break;
 	}
 
 	return id;
diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README
index f3112ff3070..efac0cb505e 100644
--- a/src/backend/access/transam/README
+++ b/src/backend/access/transam/README
@@ -897,3 +897,48 @@ yet simplifies emulation of subtransactions considerably.
 
 Further details on locking mechanics in recovery are given in comments
 with the Lock rmgr code.
+
+Distributed Transaction Emulation during Recovery
+-------------------------------------
+
+In GPDB, the MVCC snapshot also includes distributed transactions (aka dtx). 
+Accordingly, on a hot standby we also emulate running dtx. The way to do that 
+is to re-use the shmCommittedGxidArray which has been used on a primary for dtx
+recovery: it tracks all the 2PC dtx that have their PREPARE phase done, 
+but for which the COMMIT phase hasn't finished (i.e. window between the
+XLOG_XACT_DISTRIBUTED_COMMIT record being written and the 
+XLOG_XACT_DISTRIBUTED_FORGET record being written on the QD). On a hot standby, 
+any dtx shown in that array are regarded as in-progress. The MVCC snapshot does 
+not really need to account for dtx not in that array: for a dtx that hasn't 
+done PREPARE, we know no segment has committed any data yet; for a dtx that 
+hasn't done COMMIT, we know all segments have committed their data.
+
+Note: dtxes that are preparing will not be tracked in this array, and thus will 
+not be included in this snapshot. This is slightly different from a primary QD, 
+where such transactions would have been included in the distributed snapshot's 
+inProgressXidArray (as we construct the inProgressXidArray from the PGXACTs that
+would contain the dummy entries for prepared transactions). However, as 
+mentioned in CreateDistributedSnapshot, including these is not a requirement for
+correctness.
+
+Note: aborted/aborting dtxes are not accounted for by the standby either. Those
+are the dtxes that encountered error during preparing. Same as the previous 
+point, the standby does not need to be aware of them for correctness. Worth also
+noting that if a dtx encountered error after being prepared, it cannot be 
+aborted anymore and must be committed by the dtx recovery process. Until 
+committed, such a dtx will be seen as in-progress to the standby.
+
+For 1PC dtx, however, there is a known limitation where the hot standby won't 
+see the last 1PC (or the last few 1PCs if they are all 1PC). This is because 
+since 1PC does not have any WAL on QD, the standby QD won't advance its 
+latestCompletedGxid, so its distributed snapshot horizon does not include the
+last 1PC - it would view the last 1PC not yet started or at best still in 
+progress. Only if another 2PC comes, the standby would advance its 
+latestCompletedGxid and its distributed snapshot will include the previous 1PC. 
+
+We don't emulate the full architecture of "running transaction" for dtx because
+that is unnecessary, at least ATM. For example, we don't create a dtx-version
+of XLOG_RUNNING_XACTS, because we already have that information as part of the
+extended checkpoint (see TMGXACT_CHECKPOINT). We also don't need to emulate 
+other members in RunningTransactionsData, like subxid or xid-pruning related 
+variables because those do not apply to dtx.
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index f3f2a035281..ed655baf989 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2475,11 +2475,10 @@ StartTransaction(void)
 
 	/*
 	 * Transactions may be started while recovery is in progress, if
-	 * hot standby is enabled.  This mode is not supported in
-	 * Cloudberry yet.
+	 * hot standby is enabled.
 	 */
 	AssertImply(DistributedTransactionContext != DTX_CONTEXT_LOCAL_ONLY,
-				!s->startedInRecovery);
+				EnableHotStandby || !s->startedInRecovery);
 	/*
 	 * MPP Modification
 	 *
@@ -2526,20 +2525,39 @@ StartTransaction(void)
 
 		case DTX_CONTEXT_QE_TWO_PHASE_EXPLICIT_WRITER:
 		case DTX_CONTEXT_QE_TWO_PHASE_IMPLICIT_WRITER:
+			/*
+			 * Sanity check for the global xid.
+			 * 
+			 * Note for hot standby dispatch: the standby QEs are still 
+			 * writers, just like primary QEs for SELECT queries. But 
+			 * hot standby dispatch never has a valid gxid, so we skip
+			 * the gxid checks for the standby QEs.
+			 */
+			if (!IS_HOT_STANDBY_QE())
+			{
+				if (QEDtxContextInfo.distributedXid == InvalidDistributedTransactionId)
+					elog(ERROR,
+						 "distributed transaction id is invalid in context %s",
+						 DtxContextToString(DistributedTransactionContext));
+
+				/*
+				 * Update distributed XID info, this is only used for
+				 * debugging.
+				 */
+				LocalDistribXactData *ele = &MyProc->localDistribXactData;
+				ele->distribXid = QEDtxContextInfo.distributedXid;
+				ele->state = LOCALDISTRIBXACT_STATE_ACTIVE;
+			}
+			else
+				Assert(QEDtxContextInfo.distributedXid == InvalidDistributedTransactionId);
+
+			/* fall through */
 		case DTX_CONTEXT_QE_AUTO_COMMIT_IMPLICIT:
 		{
 			/* If we're running in test-mode insert a delay in writer. */
 			if (gp_enable_slow_writer_testmode)
 				pg_usleep(500000);
 
-			if (DistributedTransactionContext != DTX_CONTEXT_QE_AUTO_COMMIT_IMPLICIT &&
-				QEDtxContextInfo.distributedXid == InvalidDistributedTransactionId)
-			{
-				elog(ERROR,
-					 "distributed transaction id is invalid in context %s",
-					 DtxContextToString(DistributedTransactionContext));
-			}
-
 			/*
 			 * Snapshot must not be created before setting transaction
 			 * isolation level.
@@ -2552,28 +2570,14 @@ StartTransaction(void)
 			XactReadOnly = isMppTxOptions_ReadOnly(
 				QEDtxContextInfo.distributedTxnOptions);
 
+			/* a hot standby transaction must be read-only */
+			AssertImply(IS_HOT_STANDBY_QE(), XactReadOnly);
+
 			/*
 			 * MPP: we're a QE Writer.
 			 */
 			MyTmGxact->gxid = QEDtxContextInfo.distributedXid;
 
-			if (DistributedTransactionContext ==
-				DTX_CONTEXT_QE_TWO_PHASE_EXPLICIT_WRITER ||
-				DistributedTransactionContext ==
-				DTX_CONTEXT_QE_TWO_PHASE_IMPLICIT_WRITER)
-			{
-				Assert(QEDtxContextInfo.distributedXid !=
-					   InvalidDistributedTransactionId);
-
-				/*
-				 * Update distributed XID info, this is only used for
-				 * debugging.
-				 */
-				LocalDistribXactData *ele = &MyProc->localDistribXactData;
-				ele->distribXid = QEDtxContextInfo.distributedXid;
-				ele->state = LOCALDISTRIBXACT_STATE_ACTIVE;
-			}
-
 			if (SharedLocalSnapshotSlot != NULL)
 			{
 				LWLockAcquire(SharedLocalSnapshotSlot->slotLock, LW_EXCLUSIVE);
@@ -6880,8 +6884,8 @@ XactLogCommitRecord(TimestampTz commit_time,
 	xl_xact_distrib xl_distrib;
 	xl_xact_deldbs xl_deldbs;
 	XLogRecPtr recptr;
-	bool isOnePhaseQE = (Gp_role == GP_ROLE_EXECUTE && MyTmGxactLocal->isOnePhaseCommit);
 	bool isDtxPrepared = isPreparedDtxTransaction();
+	DistributedTransactionId distrib_xid = getDistributedTransactionId();
 
 	uint8		info;
 
@@ -6971,10 +6975,11 @@ XactLogCommitRecord(TimestampTz commit_time,
 		xl_origin.origin_timestamp = replorigin_session_origin_timestamp;
 	}
 
-	if (isDtxPrepared || isOnePhaseQE)
+	/* include distributed xid if there's one */
+	if (distrib_xid != InvalidDistributedTransactionId)
 	{
 		xl_xinfo.xinfo |= XACT_XINFO_HAS_DISTRIB;
-		xl_distrib.distrib_xid = getDistributedTransactionId();
+		xl_distrib.distrib_xid = distrib_xid;
 	}
 
 #if 0
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3fb9f121b93..be73d8fae2d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -133,7 +133,14 @@ bool		track_wal_io_timing = false;
 int         FileEncryptionEnabled = false;
 
 /* GPDB specific */
-bool gp_pause_on_restore_point_replay = false;
+char *gp_pause_on_restore_point_replay = "";
+
+/*
+ * GPDB: Have we reached a specific continuous recovery target? We set this to
+ * true if WAL replay has found a restore point matching the GPDB-specific GUC
+ * gp_pause_on_restore_point_replay and a promotion has been requested.
+ */
+static bool reachedContinuousRecoveryTarget = false;
 
 #ifdef WAL_DEBUG
 bool		XLOG_DEBUG = false;
@@ -6012,6 +6019,59 @@ recoveryStopsBefore(XLogReaderState *record)
 	return stopsHere;
 }
 
+/*
+ * GPDB: Restore point records can act as a point of synchronization to ensure
+ * cluster-wide consistency during WAL replay. If a restore point is specified
+ * in the gp_pause_on_restore_point_replay GUC, WAL replay will be paused at
+ * that restore point until replay is explicitly resumed.
+ */
+static void
+pauseRecoveryOnRestorePoint(XLogReaderState *record)
+{
+	uint8		info;
+	uint8		rmid;
+
+	/*
+	 * Ignore recovery target settings when not in archive recovery (meaning
+	 * we are in crash recovery).
+	 */
+	if (!ArchiveRecoveryRequested)
+		return;
+
+	info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+	rmid = XLogRecGetRmid(record);
+
+	if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
+	{
+		xl_restore_point *recordRestorePointData;
+
+		recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
+
+		if (strcmp(recordRestorePointData->rp_name, gp_pause_on_restore_point_replay) == 0)
+		{
+			ereport(LOG,
+					(errmsg("setting recovery pause at restore point \"%s\", time %s",
+							recordRestorePointData->rp_name,
+							timestamptz_to_str(recordRestorePointData->rp_time))));
+
+			SetRecoveryPause(true);
+			recoveryPausesHere(false);
+
+			/*
+			 * If we've unpaused and there is a promotion request, then we've
+			 * reached our continuous recovery target and need to immediately
+			 * promote. We piggyback on the existing recovery target logic to
+			 * do this. See recoveryStopsAfter().
+			 */
+			if (CheckForStandbyTrigger())
+			{
+				reachedContinuousRecoveryTarget = true;
+				recoveryTargetAction = RECOVERY_TARGET_ACTION_PROMOTE;
+			}
+		}
+	}
+}
+
 /*
  * Same as recoveryStopsBefore, but called after applying the record.
  *
@@ -6039,15 +6099,19 @@ recoveryStopsAfter(XLogReaderState *record)
 	/*
 	 * There can be many restore points that share the same name; we stop at
 	 * the first one.
+	 *
+	 * GPDB: If we've reached the continuous recovery target, we'll use the
+	 * below logic to immediately stop recovery.
 	 */
-	if (recoveryTarget == RECOVERY_TARGET_NAME &&
+	if ((reachedContinuousRecoveryTarget || recoveryTarget == RECOVERY_TARGET_NAME) &&
 		rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
 	{
 		xl_restore_point *recordRestorePointData;
 
 		recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
 
-		if (strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
+		if (reachedContinuousRecoveryTarget ||
+			strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
 		{
 			recoveryStopAfter = true;
 			recoveryStopXid = InvalidTransactionId;
@@ -6565,6 +6629,16 @@ UpdateCatalogForStandbyPromotion(void)
 	/* I am privileged */
 	InitializeSessionUserIdStandalone();
 	gp_activate_standby();
+
+        if (gp_segment_configuration_file && access(gp_segment_configuration_file, F_OK) == 0)
+        {
+                write_gp_segment_configuration();
+        }
+        else
+        {
+                elog(DEBUG1, "Skipping write_gp_segment_configuration: file not found or not configured");
+        }
+
 	/* close the transaction we started above */
 	CommitTransactionCommand();
 	Gp_role = old_role;
@@ -7900,6 +7974,9 @@ StartupXLOG(void)
 						WalSndWakeup();
 				}
 
+				if (gp_pause_on_restore_point_replay)
+					pauseRecoveryOnRestorePoint(xlogreader);
+
 				/* Exit loop if we reached inclusive recovery target */
 				if (recoveryStopsAfter(xlogreader))
 				{
@@ -8331,6 +8408,8 @@ StartupXLOG(void)
 	 */
 	InRecovery = false;
 
+	SIMPLE_FAULT_INJECTOR("out_of_recovery_in_startupxlog");
+
 	/*
 	 * Hook for plugins to do additional startup works.
 	 *
@@ -9801,8 +9880,11 @@ CreateCheckPoint(int flags)
 	 * recovery we don't need to write running xact data.
 	 */
 	if (!shutdown && XLogStandbyInfoActive())
+	{
 		LogStandbySnapshot();
 
+	}
+
 	SIMPLE_FAULT_INJECTOR("checkpoint_after_redo_calculated");
 
 	START_CRIT_SECTION();
@@ -11126,14 +11208,7 @@ xlog_redo(XLogReaderState *record)
 	}
 	else if (info == XLOG_RESTORE_POINT)
 	{
-		/*
-		 * GPDB: Restore point records can act as a point of
-		 * synchronization to ensure cluster-wide consistency during WAL
-		 * replay. WAL replay is paused at each restore point until it is
-		 * explicitly resumed.
-		 */
-		if (gp_pause_on_restore_point_replay)
-			SetRecoveryPause(true);
+		/* nothing to do here */
 	}
 	else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
 	{
diff --git a/src/backend/catalog/.gitignore b/src/backend/catalog/.gitignore
index 6c4c6d228db..3912b022a03 100644
--- a/src/backend/catalog/.gitignore
+++ b/src/backend/catalog/.gitignore
@@ -8,3 +8,4 @@
 /pg_*_d.h
 /gp_*_d.h
 /bki-stamp
+/system_views_gp.sql
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 8a58b8e5897..260bd608d50 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -56,6 +56,9 @@ OBJS += pg_extprotocol.o \
 	   gp_matview_aux.o \
        pg_directory_table.o storage_directory_table.o
 
+GP_SYSVIEW_IN = system_views_gp.in
+GP_SYSVIEW_SQL = system_views_gp.sql
+
 CATALOG_JSON:= $(addprefix $(top_srcdir)/gpMgmt/bin/gppylib/data/, $(addsuffix .json,$(GP_MAJORVERSION)))
 
 include $(top_srcdir)/src/backend/common.mk
@@ -133,7 +136,7 @@ POSTGRES_BKI_DATA += $(addprefix $(top_srcdir)/src/include/catalog/,\
 	$(top_builddir)/src/include/catalog/gp_version_at_initdb.dat
 
 
-all: distprep generated-header-symlinks
+all: distprep generated-header-symlinks $(GP_SYSVIEW_SQL)
 
 distprep: bki-stamp
 
@@ -197,6 +200,7 @@ ifeq ($(USE_INTERNAL_FTS_FOUND), false)
 endif
 	$(INSTALL_DATA) $(srcdir)/system_functions.sql '$(DESTDIR)$(datadir)/system_functions.sql'
 	$(INSTALL_DATA) $(srcdir)/system_views.sql '$(DESTDIR)$(datadir)/system_views.sql'
+	$(INSTALL_DATA) $(srcdir)/$(GP_SYSVIEW_SQL) '$(DESTDIR)$(datadir)/$(GP_SYSVIEW_SQL)'
 	$(INSTALL_DATA) $(srcdir)/information_schema.sql '$(DESTDIR)$(datadir)/information_schema.sql'
 	$(INSTALL_DATA) $(call vpathsearch,cdb_schema.sql) '$(DESTDIR)$(datadir)/cdb_init.d/cdb_schema.sql'
 	$(INSTALL_DATA) $(srcdir)/sql_features.txt '$(DESTDIR)$(datadir)/sql_features.txt'
@@ -216,4 +220,4 @@ endif
 clean:
 
 maintainer-clean: clean
-	rm -f bki-stamp postgres.bki system_constraints.sql $(GENERATED_HEADERS)
+	rm -f bki-stamp postgres.bki system_constraints.sql $(GENERATED_HEADERS) $(GP_SYSVIEW_SQL)
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 6b0b604ab5e..d5b7b81e8a2 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1098,6 +1098,7 @@ $$
 $$
 LANGUAGE SQL EXECUTE ON ALL SEGMENTS;
 
+-- This view has an additional column than pg_stat_replication so cannot be generated using system_views_gp.in
 CREATE VIEW gp_stat_replication AS
     SELECT *, pg_catalog.gp_replication_error() AS sync_error
     FROM pg_catalog.gp_stat_get_master_replication() AS R
@@ -1498,6 +1499,10 @@ rq.oid=rc.resqueueid AND rc.restypid = rt.restypid
 ORDER BY rsqname, restypid
 ;
 
+-- FIXME: we have a cluster-wide view gp_stat_database_conflicts, but that is 
+-- only showing conflicts of every segment. Some conflict might be encountered
+-- on just part of the segments. Ideally we should have a view like
+-- gp_stat_database_conflicts_summary that prints the overall conflicts and types.
 CREATE VIEW pg_stat_database_conflicts AS
     SELECT
             D.oid AS datid,
@@ -1801,11 +1806,6 @@ UNION ALL
   SELECT gp_segment_id, gp_get_suboverflowed_backends() FROM gp_dist_random('gp_id') order by 1;
 
 
-CREATE OR REPLACE VIEW gp_stat_archiver AS
-    SELECT -1 AS gp_segment_id, * FROM pg_stat_archiver
-    UNION
-    SELECT gp_execution_segment() AS gp_segment_id, * FROM gp_dist_random('pg_stat_archiver');
-
 CREATE FUNCTION gp_get_session_endpoints (OUT gp_segment_id int, OUT auth_token text,
 									  OUT cursorname text, OUT sessionid int, OUT hostname varchar(64),
 									  OUT port int, OUT username text, OUT state text,
diff --git a/src/backend/catalog/system_views_gp.in b/src/backend/catalog/system_views_gp.in
new file mode 100644
index 00000000000..d46dde3191e
--- /dev/null
+++ b/src/backend/catalog/system_views_gp.in
@@ -0,0 +1,48 @@
+# This file lists all the PG system views 'pg_%' that we would like to create an
+# MPP-aware view 'gp_%' out of. The generated 'gp_%' view definitions will be placed
+# in system_views_gp.sql, and initialized at the same time as system_views.sql.
+#pg_backend_memory_contexts
+pg_config
+pg_cursors
+pg_file_settings
+pg_replication_origin_status
+pg_replication_slots
+pg_settings
+pg_stat_activity
+pg_stat_archiver
+pg_stat_bgwriter
+#pg_stat_database
+pg_stat_database_conflicts
+pg_stat_gssapi
+pg_stat_operations
+#pg_stat_progress_analyze
+#pg_stat_progress_basebackup
+#pg_stat_progress_cluster
+#pg_stat_progress_copy
+#pg_stat_progress_create_index
+#pg_stat_progress_vacuum
+pg_stat_slru
+pg_stat_ssl
+pg_stat_subscription
+pg_stat_sys_indexes
+pg_stat_sys_tables
+pg_stat_user_functions
+pg_stat_user_indexes
+pg_stat_user_tables
+#pg_stat_wal
+pg_stat_wal_receiver
+pg_stat_xact_all_tables
+pg_stat_xact_sys_tables
+pg_stat_xact_user_functions
+pg_stat_xact_user_tables
+pg_statio_all_indexes
+pg_statio_all_sequences
+pg_statio_all_tables
+pg_statio_sys_indexes
+pg_statio_sys_sequences
+pg_statio_sys_tables
+pg_statio_user_indexes
+pg_statio_user_sequences
+pg_statio_user_tables
+#pg_stats ERROR:  column "most_common_vals" has pseudo-type anyarray
+pg_stats_ext
diff --git a/src/backend/cdb/cdbdtxcontextinfo.c b/src/backend/cdb/cdbdtxcontextinfo.c
index 1a3c1b8f295..2994821f8df 100644
--- a/src/backend/cdb/cdbdtxcontextinfo.c
+++ b/src/backend/cdb/cdbdtxcontextinfo.c
@@ -60,7 +60,7 @@ DtxContextInfo_CreateOnMaster(DtxContextInfo *dtxContextInfo, bool inCursor,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("cannot have more than 2^32-2 commands in a session")));
 
-	AssertImply(inCursor,
+	AssertImply(inCursor && !IS_HOT_STANDBY_QD(),
 				dtxContextInfo->distributedXid != InvalidDistributedTransactionId &&
 				gp_command_count == MySessionState->latestCursorCommandId);
 
diff --git a/src/backend/cdb/cdbdtxrecovery.c b/src/backend/cdb/cdbdtxrecovery.c
index 186b01ff214..605ce323ddb 100644
--- a/src/backend/cdb/cdbdtxrecovery.c
+++ b/src/backend/cdb/cdbdtxrecovery.c
@@ -202,6 +202,11 @@ recoverInDoubtTransactions(void)
 
 	for (i = 0; i < *shmNumCommittedGxacts; i++)
 	{
+		/*
+		 * No need to acquire CommittedGxidArrayLock since dtx recovery
+		 * only happens on primary, but not hot standby where concurrent 
+		 * access to this array is possible from CreateDistributedSnapshot.
+		 */
 		DistributedTransactionId gxid = shmCommittedGxidArray[i];
 		char gid[TMGIDSIZE];
 
@@ -486,7 +491,12 @@ void
 redoDistributedCommitRecord(DistributedTransactionId gxid)
 {
 	int			i;
+	bool 			is_hot_standby_qd = IS_HOT_STANDBY_QD();
 
+	/* 
+	 * Only the startup process can be modifying shmNumCommittedGxacts
+	 * and shmCommittedGxidArray. So should be OK reading the value w/o lock.
+	 */
 	for (i = 0; i < *shmNumCommittedGxacts; i++)
 	{
 		if (gxid == shmCommittedGxidArray[i])
@@ -526,7 +536,18 @@ redoDistributedCommitRecord(DistributedTransactionId gxid)
 							   "around this issue and then report a bug")));
 		}
 
+		/*
+		 * only on hot standby there might be backends that call CreateDistributedSnapshot()
+		 * to access the committed gxid array concurrently.
+		 */
+		if (is_hot_standby_qd)
+			LWLockAcquire(CommittedGxidArrayLock, LW_EXCLUSIVE);
+
 		shmCommittedGxidArray[(*shmNumCommittedGxacts)++] = gxid;
+
+		if (is_hot_standby_qd)
+			LWLockRelease(CommittedGxidArrayLock);
+
 		elog((Debug_print_full_dtm ? LOG : DEBUG5),
 			 "Crash recovery redo added committed distributed transaction gid = "UINT64_FORMAT, gxid);
 	}
@@ -539,7 +560,13 @@ void
 redoDistributedForgetCommitRecord(DistributedTransactionId gxid)
 {
 	int			i;
-
+	bool 			is_hot_standby_qd = IS_HOT_STANDBY_QD();
+	
+	SIMPLE_FAULT_INJECTOR("redoDistributedForgetCommitRecord");
+	/* 
+	 * Only the startup process can be modifying shmNumCommittedGxacts
+	 * and shmCommittedGxidArray. So should be OK reading the value w/o lock.
+	 */
 	for (i = 0; i < *shmNumCommittedGxacts; i++)
 	{
 		if (gxid == shmCommittedGxidArray[i])
@@ -550,13 +577,27 @@ redoDistributedForgetCommitRecord(DistributedTransactionId gxid)
 				 gxid);
 
 			/*
-			 * there's no concurrent access to shmCommittedGxidArray during
-			 * recovery
+			 * only on hot standby there might be backends that call CreateDistributedSnapshot()
+			 * to access the committed gxid array concurrently.
 			 */
+			if (is_hot_standby_qd)
+				LWLockAcquire(CommittedGxidArrayLock, LW_EXCLUSIVE);
+
 			(*shmNumCommittedGxacts)--;
 			if (i != *shmNumCommittedGxacts)
 				shmCommittedGxidArray[i] = shmCommittedGxidArray[*shmNumCommittedGxacts];
 
+			if (is_hot_standby_qd)
+				LWLockRelease(CommittedGxidArrayLock);
+
+			/* on the hot standby, we rely on the forget record to advance latestCompletedGxid */
+			if (is_hot_standby_qd)
+			{
+				LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+				if (gxid > ShmemVariableCache->latestCompletedGxid)
+					ShmemVariableCache->latestCompletedGxid = gxid;
+				LWLockRelease(ProcArrayLock);
+			}
 			return;
 		}
 	}
diff --git a/src/backend/cdb/cdbfts.c b/src/backend/cdb/cdbfts.c
index 754d3054cbb..de268b6f662 100644
--- a/src/backend/cdb/cdbfts.c
+++ b/src/backend/cdb/cdbfts.c
@@ -84,6 +84,10 @@ FtsNotifyProber(void)
 	int32			started;
 	int32			done;
 
+	/* Ignore if we don't have a FTS probe process, like a standby QD in a mirrored cluster. */
+	if (FtsProbePID() == 0)
+		return;
+
 	if (am_ftsprobe)
 		return;
 
diff --git a/src/backend/cdb/cdbtm.c b/src/backend/cdb/cdbtm.c
index f0cd5fcb3f6..37550261149 100644
--- a/src/backend/cdb/cdbtm.c
+++ b/src/backend/cdb/cdbtm.c
@@ -264,6 +264,21 @@ currentDtxActivate(void)
 {
 	bool signal_dtx_recovery;
 
+	/*
+	 * A hot standby transaction does not have a valid gxid, so can skip 
+	 * most of the things in this function. We still explicitly set some 
+	 * fields that are irrelevant to hot standby for cleanness.
+	 */
+	if (IS_HOT_STANDBY_QD())
+	{
+		/* standby QD will stay in this state until transaction completed */
+		setCurrentDtxState(DTX_STATE_ACTIVE_DISTRIBUTED);
+		MyTmGxact->sessionId = gp_session_id;
+		MyTmGxact->gxid = InvalidDistributedTransactionId;
+		MyTmGxact->includeInCkpt = false;
+		return;
+	}
+
 	if (ShmemVariableCache->GxidCount <= GXID_PRETCH_THRESHOLD &&
 		(GetDtxRecoveryEvent() & DTX_RECOVERY_EVENT_BUMP_GXID) == 0)
 	{
@@ -1644,7 +1659,7 @@ isDtxQueryDispatcher(void)
 	isSharedLocalSnapshotSlotPresent = (SharedLocalSnapshotSlot != NULL);
 
 	return (Gp_role == GP_ROLE_DISPATCH &&
-			isDtmStarted &&
+			(isDtmStarted || EnableHotStandby) &&
 			isSharedLocalSnapshotSlotPresent);
 }
 
@@ -2047,6 +2062,8 @@ sendDtxExplicitBegin(void)
 static void
 performDtxProtocolPrepare(const char *gid)
 {
+	SIMPLE_FAULT_INJECTOR("qe_start_prepared");
+
 	StartTransactionCommand();
 
 	elog(DTM_DEBUG5, "performDtxProtocolCommand going to call PrepareTransactionBlock for distributed transaction (id = '%s')", gid);
@@ -2126,6 +2143,7 @@ performDtxProtocolCommitOnePhase(const char *gid)
 static void
 performDtxProtocolCommitPrepared(const char *gid, bool raiseErrorIfNotFound)
 {
+	SIMPLE_FAULT_INJECTOR("qe_start_commit_prepared");
 	Assert(Gp_role == GP_ROLE_EXECUTE);
 
 	elog(DTM_DEBUG5,
@@ -2158,6 +2176,7 @@ performDtxProtocolCommitPrepared(const char *gid, bool raiseErrorIfNotFound)
 	sendWaitGxidsToQD(waitGxids);
 
 	finishDistributedTransactionContext("performDtxProtocolCommitPrepared -- Commit Prepared", false);
+	SIMPLE_FAULT_INJECTOR("finish_commit_prepared");
 }
 
 /**
diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c
index 1671b17223b..888b5b08708 100644
--- a/src/backend/cdb/cdbutil.c
+++ b/src/backend/cdb/cdbutil.c
@@ -37,6 +37,7 @@
 #include "utils/memutils.h"
 #include "catalog/gp_id.h"
 #include "catalog/indexing.h"
+#include "catalog/heap.h"
 #include "cdb/cdbhash.h"
 #include "cdb/cdbutil.h"
 #include "cdb/cdbmotion.h"
@@ -60,6 +61,9 @@
 #include "catalog/gp_indexing.h"
 #include "utils/etcd.h"
 #include "common/etcdutils.h"
+#include "storage/sinvaladt.h"
+#include "storage/bufmgr.h"
+#include "utils/syscache.h"
 
 #include "catalog/gp_indexing.h"
 
@@ -79,6 +83,7 @@
 
 MemoryContext CdbComponentsContext = NULL;
 static CdbComponentDatabases *cdb_component_dbs = NULL;
+char *gp_segment_configuration_file = NULL;
 
 #ifdef USE_INTERNAL_FTS
 
@@ -92,6 +97,7 @@ static int	CdbComponentDatabaseInfoCompare(const void *p1, const void *p2);
 
 static GpSegConfigEntry * readGpSegConfigFromCatalog(int *total_dbs);
 static GpSegConfigEntry * readGpSegConfigFromFTSFiles(int *total_dbs);
+static GpSegConfigEntry * readGpSegConfigFromExtFile(int *total_dbs);
 
 static void getAddressesForDBid(GpSegConfigEntry *c, int elevel);
 static HTAB *hostPrimaryCountHashTableInit(void);
@@ -372,7 +378,14 @@ getCdbComponentInfo(void)
 
 	HTAB	   *hostPrimaryCountHash = hostPrimaryCountHashTableInit();
 
-	if (IsTransactionState())
+	/* On hotstandby, if gp_segment_configuration_file is configured, try
+	 * to load configs from it. Since hotstandby may be created from a
+	 * basebackup that the table gp_segment_configuration is backuped from
+	 * the souce cluster and cannot be modified in read repilca mode.
+	 */
+	if (EnableHotStandby && gp_segment_configuration_file)
+		configs = readGpSegConfigFromExtFile(&total_dbs);
+	else if (IsTransactionState())
 		configs = readGpSegConfigFromCatalog(&total_dbs);
 	else
 		configs = readGpSegConfigFromFTSFiles(&total_dbs);
@@ -565,7 +578,7 @@ getCdbComponentInfo(void)
 	{
 		cdbInfo = &component_databases->segment_db_info[i];
 
-		if (cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY)
+		if (!IS_HOT_STANDBY_QD() && cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY)
 			continue;
 
 		hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, cdbInfo->config->hostname, HASH_FIND, &found);
@@ -577,7 +590,7 @@ getCdbComponentInfo(void)
 	{
 		cdbInfo = &component_databases->entry_db_info[i];
 
-		if (cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY)
+		if (!IS_HOT_STANDBY_QD() && cdbInfo->config->role != GP_SEGMENT_CONFIGURATION_ROLE_PRIMARY)
 			continue;
 
 		hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, cdbInfo->config->hostname, HASH_FIND, &found);
@@ -1005,7 +1018,16 @@ cdbcomponent_getComponentInfo(int contentId)
 	/* entry db */
 	if (contentId == -1)
 	{
-		cdbInfo = &cdbs->entry_db_info[0];	
+		Assert(cdbs->total_entry_dbs == 1 || cdbs->total_entry_dbs == 2);
+		/*
+		 * For a standby QD, get the last entry db which can be the first (on
+		 * a replica cluster) or the second (on a mirrored cluster) entry.
+		 */
+		if (IS_HOT_STANDBY_QD())
+			cdbInfo = &cdbs->entry_db_info[cdbs->total_entry_dbs - 1];
+		else
+			cdbInfo = &cdbs->entry_db_info[0];	
+
 		return cdbInfo;
 	}
 
@@ -1022,10 +1044,10 @@ cdbcomponent_getComponentInfo(int contentId)
 		Assert(cdbs->total_segment_dbs == cdbs->total_segments * 2);
 		cdbInfo = &cdbs->segment_db_info[2 * contentId];
 
-		if (!SEGMENT_IS_ACTIVE_PRIMARY(cdbInfo))
-		{
+		/* use the other segment if it is not what the QD wants */
+		if ((IS_HOT_STANDBY_QD() && SEGMENT_IS_ACTIVE_PRIMARY(cdbInfo)) 
+						|| (!IS_HOT_STANDBY_QD() && !SEGMENT_IS_ACTIVE_PRIMARY(cdbInfo)))
 			cdbInfo = &cdbs->segment_db_info[2 * contentId + 1];
-		}
 
 		return cdbInfo;
 	}
@@ -1124,10 +1146,21 @@ cdb_setup(void)
 	 *
 	 * Ignore background worker because bgworker_should_start_mpp() already did
 	 * the check.
+	 *
+	 * Ignore if we are the standby coordinator started in hot standby mode.
+	 * We don't expect dtx recovery to have finished, as dtx recovery is
+	 * performed at the end of startup. In hot standby, we are recovering
+	 * continuously and should allow queries much earlier. Since a hot standby
+	 * won't proceed dtx, it is not required to wait for recovery of the dtx
+	 * that has been prepared but not committed (i.e. to commit them); on the
+	 * other hand, the recovery of any in-doubt transactions (i.e. not prepared)
+	 * won't bother a hot standby either, just like they can be recovered in the 
+	 * background when a primary instance is running.
 	 */
 	if (!IsBackgroundWorker &&
 		Gp_role == GP_ROLE_DISPATCH &&
-		!*shmDtmStarted)
+		!*shmDtmStarted &&
+		!IS_HOT_STANDBY_QD())
 	{
 		ereport(FATAL,
 				(errcode(ERRCODE_CANNOT_CONNECT_NOW),
@@ -1978,7 +2011,172 @@ gp_get_suboverflowed_backends(PG_FUNCTION_ARGS)
 		PG_RETURN_NULL();
 }
 
-#else 
+void
+add_segment_config_entry(GpSegConfigEntry *i)
+{
+	Relation	rel = table_open(GpSegmentConfigRelationId, AccessExclusiveLock);
+	Datum		values[Natts_gp_segment_configuration];
+	bool		nulls[Natts_gp_segment_configuration];
+	HeapTuple	tuple;
+
+	MemSet(nulls, false, sizeof(nulls));
+
+	values[Anum_gp_segment_configuration_dbid - 1] = Int16GetDatum(i->dbid);
+	values[Anum_gp_segment_configuration_content - 1] = Int16GetDatum(i->segindex);
+	values[Anum_gp_segment_configuration_role - 1] = CharGetDatum(i->role);
+	values[Anum_gp_segment_configuration_preferred_role - 1] =
+		CharGetDatum(i->preferred_role);
+	values[Anum_gp_segment_configuration_mode - 1] =
+		CharGetDatum(i->mode);
+	values[Anum_gp_segment_configuration_status - 1] =
+		CharGetDatum(i->status);
+	values[Anum_gp_segment_configuration_port - 1] =
+		Int32GetDatum(i->port);
+	values[Anum_gp_segment_configuration_hostname - 1] =
+		CStringGetTextDatum(i->hostname);
+	values[Anum_gp_segment_configuration_address - 1] =
+		CStringGetTextDatum(i->address);
+	values[Anum_gp_segment_configuration_datadir - 1] =
+		CStringGetTextDatum(i->datadir);
+	values[Anum_gp_segment_configuration_warehouseid - 1] =
+		ObjectIdGetDatum(i->warehouseid);
+
+	tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+	/* insert a new tuple */
+	CatalogTupleInsert(rel, tuple);
+
+	table_close(rel, NoLock);
+}
+
+void
+remove_segment_config_entry(int16 dbid)
+{
+	int			numDel = 0;
+	ScanKeyData scankey;
+	SysScanDesc sscan;
+	HeapTuple	tuple;
+	Relation	rel;
+
+	rel = table_open(GpSegmentConfigRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&scankey,
+				Anum_gp_segment_configuration_dbid,
+				BTEqualStrategyNumber, F_INT2EQ,
+				Int16GetDatum(dbid));
+	sscan = systable_beginscan(rel, GpSegmentConfigDbidWarehouseIndexId, true,
+							   NULL, 1, &scankey);
+	while ((tuple = systable_getnext(sscan)) != NULL)
+	{
+		Datum		attr;
+		bool		isNull;
+		Oid			warehouseid = InvalidOid;
+
+		attr = heap_getattr(tuple, Anum_gp_segment_configuration_warehouseid,
+							RelationGetDescr(rel), &isNull);
+		Assert(!isNull);
+		warehouseid = DatumGetObjectId(attr);
+
+		if (!OidIsValid(warehouseid) || warehouseid == GetCurrentWarehouseId())
+		{
+			CatalogTupleDelete(rel, &tuple->t_self);
+			numDel++;
+		}
+	}
+	systable_endscan(sscan);
+
+	Assert(numDel > 0);
+
+	table_close(rel, NoLock);
+}
+
+static GpSegConfigEntry*
+readGpSegConfigFromExtFile(int *total_dbs)
+{
+	FILE	*fd;
+	int		idx = 0;
+	int		array_size = 500;
+	GpSegConfigEntry *configs = NULL;
+	GpSegConfigEntry *config = NULL;
+
+	char	hostname[MAXHOSTNAMELEN];
+	char	address[MAXHOSTNAMELEN];
+	char	datadir[1000];
+	char	buf[MAXHOSTNAMELEN * 2 + 32 + 2000];
+
+	Assert(gp_segment_configuration_file && strcmp(gp_segment_configuration_file, "") != 0);
+
+	/* notify and wait FTS to finish a probe and update the dump file */
+
+	fd = AllocateFile(gp_segment_configuration_file, "r");
+
+	if (!fd)
+		elog(ERROR, "could not open gp_segment_configutation dump file:%s:%m", gp_segment_configuration_file);
+
+	configs = palloc0(sizeof (GpSegConfigEntry) * array_size);
+	while (fgets(buf, sizeof(buf), fd))
+	{
+		config = &configs[idx];
+
+		if (sscanf(buf, "%d %d %c %c %c %c %d %s %s %s", (int *)&config->dbid, (int *)&config->segindex,
+					&config->role, &config->preferred_role, &config->mode, &config->status,
+					&config->port, hostname, address, datadir) != 10)
+		{
+			FreeFile(fd);
+			elog(ERROR, "invalid data in gp_segment_configuration dump file: %s:%m", gp_segment_configuration_file);
+		}
+
+		config->hostname = pstrdup(hostname);
+		config->address = pstrdup(address);
+		config->datadir = pstrdup(datadir);
+
+		idx++;
+		/*
+		 * Expand CdbComponentDatabaseInfo array if we've used up
+		 * currently allocated space
+		 */
+		if (idx >= array_size)
+		{
+			array_size = array_size * 2;
+			configs = (GpSegConfigEntry *)
+				repalloc(configs, sizeof(GpSegConfigEntry) * array_size);
+		}
+	}
+
+	FreeFile(fd);
+
+	*total_dbs = idx;
+	return configs;
+}
+
+void
+write_gp_segment_configuration(void)
+{
+	Relation rel;
+	GpSegConfigEntry *configs;
+	int total_dbs;
+	SysScanDesc sscan;
+	HeapTuple   tuple;
+
+	rel = table_open(GpSegmentConfigRelationId, RowExclusiveLock);
+	sscan = systable_beginscan(rel, GpSegmentConfigDbidWarehouseIndexId, true,
+			NULL, 0, NULL);
+	while ((tuple = systable_getnext(sscan)) != NULL)
+	{
+		CatalogTupleDelete(rel, &tuple->t_self);
+	}
+	systable_endscan(sscan);
+
+	/* insert new configs into gp_segment_configuration table */
+	configs = readGpSegConfigFromExtFile(&total_dbs);
+	for (int i = 0; i < total_dbs; i++) {
+		GpSegConfigEntry config = configs[i];
+		add_segment_config_entry(&config);
+	}
+	table_close(rel, RowExclusiveLock);
+}
+
+#else
 bool am_ftshandler = false;
 
 
diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c
index 99f5179e756..fb29bd9fa14 100644
--- a/src/backend/cdb/dispatcher/cdbdisp_query.c
+++ b/src/backend/cdb/dispatcher/cdbdisp_query.c
@@ -867,6 +867,7 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms,
 {
 	const char *command = pQueryParms->strCommand;
 	int			command_len;
+	int			is_hs_dispatch = IS_HOT_STANDBY_QD() ? 1 : 0;
 	const char *plantree = pQueryParms->serializedPlantree;
 	int			plantree_len = pQueryParms->serializedPlantreelen;
 	const char *sddesc = pQueryParms->serializedQueryDispatchDesc;
@@ -921,6 +922,7 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms,
 		sizeof(outerUserId) /* outerUserIsSuper */ +
 		sizeof(currentUserId) +
 		sizeof(n32) * 2 /* currentStatementStartTimestamp */ +
+		sizeof(is_hs_dispatch) +
 		sizeof(command_len) +
 		sizeof(plantree_len) +
 		sizeof(sddesc_len) +
@@ -976,6 +978,10 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms,
 	memcpy(pos, &n32, sizeof(n32));
 	pos += sizeof(n32);
 
+	tmp = htonl(is_hs_dispatch);
+	memcpy(pos, &tmp, sizeof(is_hs_dispatch));
+	pos += sizeof(is_hs_dispatch);
+
 	tmp = htonl(command_len);
 	memcpy(pos, &tmp, sizeof(command_len));
 	pos += sizeof(command_len);
diff --git a/src/backend/cdb/dispatcher/cdbgang.c b/src/backend/cdb/dispatcher/cdbgang.c
index 780ddef0f42..87ce88504b0 100644
--- a/src/backend/cdb/dispatcher/cdbgang.c
+++ b/src/backend/cdb/dispatcher/cdbgang.c
@@ -698,8 +698,7 @@ getCdbProcessesForQD(int isPrimary)
 
 	qdinfo = cdbcomponent_getComponentInfo(MASTER_CONTENT_ID);
 
-	Assert(qdinfo->config->segindex == -1);
-	Assert(SEGMENT_IS_ACTIVE_PRIMARY(qdinfo));
+	Assert((qdinfo->config->segindex == -1 && SEGMENT_IS_ACTIVE_PRIMARY(qdinfo)) || IS_HOT_STANDBY_QD());
 	Assert(qdinfo->config->hostip != NULL);
 
 	proc = makeNode(CdbProcess);
diff --git a/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c b/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c
new file mode 100644
index 00000000000..6e07aebcc96
--- /dev/null
+++ b/src/backend/cdb/dispatcher/test/cdbdisp_query_test.c
@@ -0,0 +1,341 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include "cmockery.h"
+#include "postgres.h"
+
+#include "storage/ipc.h"
+#include "storage/proc.h"
+
+#include "../cdbdisp_query.c"
+
+
+#undef PG_RE_THROW
+#define PG_RE_THROW() siglongjmp(*PG_exception_stack, 1)
+
+
+int			__wrap_errmsg(const char *fmt,...);
+int			__wrap_errcode(int sqlerrcode);
+bool		__wrap_errstart(int elevel, const char *filename, int lineno,
+							const char *funcname, const char *domain);
+void		__wrap_errfinish(int dummy __attribute__((unused)),...);
+Gang	   *__wrap_cdbgang_createGang_async(List *segments, SegmentType segmentType);
+int			__wrap_pqPutMsgStart(char msg_type, bool force_len, PGconn *conn);
+int			__wrap_PQcancel(PGcancel *cancel, char *errbuf, int errbufsize);
+char	   *__wrap_serializeNode(Node *node, int *size, int *uncompressed_size_out);
+char	   *__wrap_qdSerializeDtxContextInfo(int *size, bool wantSnapshot, bool inCursor, int txnOptions, char *debugCaller);
+void		__wrap_VirtualXactLockTableInsert(VirtualTransactionId vxid);
+void		__wrap_AcceptInvalidationMessages(void);
+static void terminate_process();
+
+
+int
+__wrap_errmsg(const char *fmt,...)
+{
+	check_expected(fmt);
+	optional_assignment(fmt);
+	return (int) mock();
+}
+
+
+int
+__wrap_errcode(int sqlerrcode)
+{
+	check_expected(sqlerrcode);
+	return (int) mock();
+}
+
+
+bool
+__wrap_errstart(int elevel, const char *filename, int lineno,
+				const char *funcname, const char *domain)
+{
+	if (elevel < LOG)
+		return false;
+
+	check_expected(elevel);
+	check_expected(filename);
+	check_expected(lineno);
+	check_expected(funcname);
+	check_expected(domain);
+	optional_assignment(filename);
+	optional_assignment(funcname);
+	optional_assignment(domain);
+	return (bool) mock();
+}
+
+
+void
+__wrap_errfinish(int dummy __attribute__((unused)),...)
+{
+	PG_RE_THROW();
+}
+
+
+static void
+expect_ereport(int expect_elevel)
+{
+	expect_any(__wrap_errmsg, fmt);
+	will_be_called(__wrap_errmsg);
+
+	expect_any(__wrap_errcode, sqlerrcode);
+	will_be_called(__wrap_errcode);
+
+	expect_value(__wrap_errstart, elevel, expect_elevel);
+	expect_any(__wrap_errstart, filename);
+	expect_any(__wrap_errstart, lineno);
+	expect_any(__wrap_errstart, funcname);
+	expect_any(__wrap_errstart, domain);
+	if (expect_elevel < ERROR)
+	{
+		will_return(__wrap_errstart, false);
+	}
+	else
+	{
+		will_return(__wrap_errstart, true);
+	}
+}
+
+
+Gang *
+__wrap_cdbgang_createGang_async(List *segments, SegmentType segmentType)
+{
+	MemoryContext oldContext = MemoryContextSwitchTo(DispatcherContext);
+	Gang	   *gang = buildGangDefinition(segments, segmentType);
+
+	MemoryContextSwitchTo(oldContext);
+
+	PGconn	   *conn = (PGconn *) malloc(sizeof(PGconn));
+
+	MemSet(conn, 0, sizeof(PGconn));
+	initPQExpBuffer(&conn->errorMessage);
+	initPQExpBuffer(&conn->workBuffer);
+	gang->db_descriptors[0]->conn = conn;
+
+	return gang;
+}
+
+
+int
+__wrap_pqPutMsgStart(char msg_type, bool force_len, PGconn *conn)
+{
+	if (conn->outBuffer_shared)
+		fail_msg("Mustn't send something else during dispatch!");
+	check_expected(msg_type);
+	check_expected(force_len);
+	check_expected(conn);
+	optional_assignment(conn);
+	return (int) mock();
+}
+
+
+int
+__wrap_PQcancel(PGcancel *cancel, char *errbuf, int errbufsize)
+{
+	return (int) mock();
+}
+
+
+char *
+__wrap_serializeNode(Node *node, int *size, int *uncompressed_size_out)
+{
+	const int	alloc_size = 1024;
+
+	if (size != NULL)
+		*size = alloc_size;
+	if (uncompressed_size_out != NULL)
+		*uncompressed_size_out = alloc_size;
+
+	return (char *) palloc(alloc_size);
+}
+
+
+char *
+__wrap_qdSerializeDtxContextInfo(int *size, bool wantSnapshot, bool inCursor, int txnOptions, char *debugCaller)
+{
+	const int	alloc_size = 1024;
+
+	assert_int_not_equal(size, NULL);
+	*size = alloc_size;
+
+	return (char *) palloc(alloc_size);
+}
+
+
+void
+__wrap_VirtualXactLockTableInsert(VirtualTransactionId vxid)
+{
+	mock();
+}
+
+void
+__wrap_AcceptInvalidationMessages(void)
+{
+	mock();
+}
+
+
+static void
+terminate_process()
+{
+	die(SIGTERM);
+}
+
+/*
+ * Test query may be interrupted during plan dispatching
+ */
+static void
+test__CdbDispatchPlan_may_be_interrupted(void **state)
+{
+	PlannedStmt *plannedstmt = (PlannedStmt *) palloc(sizeof(PlannedStmt));
+
+	/* slice table is needed to allocate gang */
+	plannedstmt->slices = palloc0(sizeof(PlanSlice));
+	plannedstmt->numSlices = 1;
+	PlanSlice  *slice = &plannedstmt->slices[0];
+
+	slice->sliceIndex = 1;
+	slice->gangType = GANGTYPE_PRIMARY_READER;
+	slice->numsegments = 1;
+	slice->parentIndex = -1;
+	slice->segindex = 0;
+
+	QueryDesc  *queryDesc = (QueryDesc *) palloc(sizeof(QueryDesc));
+
+	queryDesc->plannedstmt = plannedstmt;
+	/* ddesc->secContext is filled in cdbdisp_buildPlanQueryParms() */
+	queryDesc->ddesc = (QueryDispatchDesc *) palloc(sizeof(QueryDispatchDesc));
+	/* source text is required for buildGpQueryString() */
+	queryDesc->sourceText = "select a from t1;";
+
+	queryDesc->estate = CreateExecutorState();
+
+	/* will be called multiple times in e.g. FtsNotifyProber/getCdbComponentInfo */
+	will_return_count(RecoveryInProgress, false, -1);
+
+	/* cdbcomponent_getCdbComponents() mocks */
+	will_be_called(FtsNotifyProber);
+	will_return(getFtsVersion, 1);
+	will_return(GetGpExpandVersion, 1);
+
+	/* StartTransactionCommand() mocks */
+	will_be_called(__wrap_VirtualXactLockTableInsert);
+	will_be_called(__wrap_AcceptInvalidationMessages);
+	will_be_called(initialize_wal_bytes_written);
+
+	/*
+	 * cdbdisp_dispatchToGang()
+	 *
+	 * start sending MPP query to QE inside PQsendGpQuery_shared() replace
+	 * connection buffer with the shared one
+	 */
+	expect_any(PQsendQueryStart, conn);
+	will_return(PQsendQueryStart, true);
+
+	/* first try to flush MPP query inside PQsendGpQuery_shared() */
+	expect_any(pqFlushNonBlocking, conn);
+	will_return(pqFlushNonBlocking, 1);
+
+	/*
+	 * cdbdisp_waitDispatchFinish()
+	 *
+	 * query will be interrupted before poll()
+	 */
+	expect_any_count(ResetWaitEventSet, pset, 2);
+	expect_any_count(ResetWaitEventSet, context, 2);
+	expect_any_count(ResetWaitEventSet, nevents, 2);
+	will_be_called_count(ResetWaitEventSet, 2);
+
+	expect_any(pqFlushNonBlocking, conn);
+	will_return_with_sideeffect(pqFlushNonBlocking, 1, &terminate_process, NULL);
+
+	expect_any(SetLatch, latch);
+	will_be_called(SetLatch);
+
+	expect_any(AddWaitEventToSet, set);
+	expect_any(AddWaitEventToSet, events);
+	expect_any(AddWaitEventToSet, fd);
+	expect_any(AddWaitEventToSet, latch);
+	expect_any(AddWaitEventToSet, user_data);
+	will_be_called(AddWaitEventToSet);
+
+	will_return(IsLogicalLauncher, false);
+
+	/* process was terminated by administrative command */
+	expect_ereport(FATAL);
+
+	/* QD will trying to cancel queries on QEs */
+	will_return(__wrap_PQcancel, true);
+
+	/* during close and free connection */
+	expect_any_count(pqClearAsyncResult, conn, 2);
+	will_be_called_count(pqClearAsyncResult, 2);
+
+	/*
+	 * BUT! pqPutMsgStart mustn't be called
+	 *
+	 * we can't send termination message (X) until shared message isn't sent
+	 * out the buffer completely
+	 */
+
+	/*
+	 * dirty hack. cluster topology needed to allocate gangs is loaded from
+	 * gpsegconfig_dump outside of transaction
+	 */
+	cdbcomponent_getCdbComponents();
+
+	StartTransactionCommand();
+
+	PG_TRY();
+	{
+		queryDesc->estate->es_sliceTable = InitSliceTable(queryDesc->estate, plannedstmt);
+
+		CdbDispatchPlan(queryDesc, queryDesc->estate->es_param_exec_vals,
+						false, false);
+		fail();
+	}
+	PG_CATCH();
+	{
+		/*
+		 * SIGTERM handling emulation gpdb bail out from CheckDispatchResult
+		 * without flushing unsent messages in case of process exit in
+		 * progress AtAbort_DispatcherState will be called during transaction
+		 * abort
+		 */
+		proc_exit_inprogress = true;
+
+		AtAbort_DispatcherState();
+	}
+	PG_END_TRY();
+}
+
+int
+main(int argc, char *argv[])
+{
+	cmockery_parse_arguments(argc, argv);
+
+	const		UnitTest tests[] =
+	{
+		unit_test(test__CdbDispatchPlan_may_be_interrupted)
+	};
+
+	Gp_role = GP_ROLE_DISPATCH;
+	/* to start transaction */
+	PGPROC		proc;
+
+	MyBackendId = 7;
+	proc.backendId = MyBackendId;
+	MyProc = &proc;
+	/* to build cdb components info */
+	GpIdentity.dbid = 1;
+	GpIdentity.segindex = -1;
+
+	MemoryContextInit();
+
+	/* to avoid mocking cdbtm.c functions */
+	MyTmGxactLocal = (TMGXACTLOCAL *) MemoryContextAllocZero(TopMemoryContext, sizeof(TMGXACTLOCAL));
+
+	SetSessionUserId(1000, true);
+
+	return run_tests(tests);
+}
diff --git a/src/backend/fts/fts.c b/src/backend/fts/fts.c
index 719e8fbca1c..c7c1711e97f 100644
--- a/src/backend/fts/fts.c
+++ b/src/backend/fts/fts.c
@@ -102,7 +102,7 @@ sigIntHandler(SIGNAL_ARGS)
 pid_t
 FtsProbePID(void)
 {
-	return *shmFtsProbePID;
+	return shmFtsProbePID ? *shmFtsProbePID : 0;
 }
 
 bool
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 1a835983222..68524222d71 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -371,6 +371,9 @@ standby_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			 * XLOG_XACT_INVALIDATIONS.  So we don't need to do anything here.
 			 */
 			break;
+		case XLOG_LATESTCOMPLETED_GXID:
+			/* FIXME: need to decode this part? */
+			break;
 		default:
 			elog(ERROR, "unexpected RM_STANDBY_ID record type: %u", info);
 	}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 73a53822b3d..e2953686b8e 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -1712,6 +1712,7 @@ OpenTemporaryFile(bool interXact, const char *filePrefix)
 	if (!interXact)
 		RegisterTemporaryFile(file);
 
+	SIMPLE_FAULT_INJECTOR("after_open_temp_file");
 	return file;
 }
 
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 3154caba1bd..57c03cce7d9 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2530,8 +2530,10 @@ getDtxCheckPointInfo(char **result, int *result_size)
 	gxid_array = &gxact_checkpoint->committedGxidArray[0];
 
 	actual = 0;
+	LWLockAcquire(CommittedGxidArrayLock, LW_SHARED);
 	for (; actual < *shmNumCommittedGxacts; actual++)
 		gxid_array[actual] = shmCommittedGxidArray[actual];
+	LWLockRelease(CommittedGxidArrayLock);
 
 	SIMPLE_FAULT_INJECTOR("checkpoint_dtx_info");
 
@@ -2609,7 +2611,8 @@ CreateDistributedSnapshot(DistributedSnapshot *ds)
 	ProcArrayStruct *arrayP = procArray;
 
 	Assert(LWLockHeldByMe(ProcArrayLock));
-	if (*shmNumCommittedGxacts != 0)
+	/* Hot standby accepts query while constantly replaying dtx, so this ERROR doesn't apply. */
+	if (!IS_HOT_STANDBY_QD() && *shmNumCommittedGxacts != 0)
 		elog(ERROR, "Create distributed snapshot before DTM recovery finish");
 
 	xmin = xmax = ShmemVariableCache->latestCompletedGxid + 1;
@@ -2623,9 +2626,45 @@ CreateDistributedSnapshot(DistributedSnapshot *ds)
 
 	Assert(ds->inProgressXidArray != NULL);
 
+	/*
+	 * For a hot standby QD, check shmCommittedGxidArray to build the knowledge.
+	 * Need to acquire shared lock to access the committed gxid array as the
+	 * startup process might modify it.
+	 */
+	if (IS_HOT_STANDBY_QD())
+	{
+		LWLockAcquire(CommittedGxidArrayLock, LW_SHARED);
+		for (i = 0; i < *shmNumCommittedGxacts; i++)
+		{
+			DistributedTransactionId gxid;
+
+			gxid = shmCommittedGxidArray[i];
+
+			if (gxid == InvalidDistributedTransactionId || gxid >= xmax)
+				continue;
+
+			if (gxid < xmin)
+				xmin = gxid;
+
+			ds->inProgressXidArray[count++] = gxid;
+		}
+		LWLockRelease(CommittedGxidArrayLock);
+	}
+
 	/*
 	 * Gather up current in-progress global transactions for the distributed
 	 * snapshot.
+	 *
+	 * Note: The inProgressXidArray built below may contain transactions that
+	 * have been prepared on some/all segments, and for which the QD hasn't
+	 * begun the COMMIT phase (by writing a XLOG_XACT_DISTRIBUTED_COMMIT record).
+	 * The gxids of these transactions don't necessarily have to be placed into
+	 * inProgressXidArray, for correctness. This is because for visibility
+	 * checks on the QEs, a state of DISTRIBUTEDSNAPSHOT_COMMITTED_UNKNOWN will
+	 * be encountered for such txs, prompting a local check. The local check will
+	 * always find these txs in progress (due to the dummy PGXACTs being
+	 * recorded for prepared txs). So, hypothetically we could exclude these txs
+	 * here, but we don't currently track them on the QD, so we can't.
 	 */
 	for (i = 0; i < arrayP->numProcs; i++)
 	{
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 687ce03767d..13dc551ca54 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -21,6 +21,7 @@
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
+#include "cdb/cdbvars.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
@@ -29,6 +30,7 @@
 #include "storage/procarray.h"
 #include "storage/sinvaladt.h"
 #include "storage/standby.h"
+#include "utils/faultinjector.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -848,6 +850,8 @@ SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
 	 * SIGUSR1 handling in each backend decide their own fate.
 	 */
 	CancelDBBackends(InvalidOid, reason, false);
+
+	SIMPLE_FAULT_INJECTOR("recovery_conflict_bufferpin_signal_sent");
 }
 
 /*
@@ -1148,6 +1152,23 @@ standby_redo(XLogReaderState *record)
 											 xlrec->dbId,
 											 xlrec->tsId);
 	}
+	else if (info == XLOG_LATESTCOMPLETED_GXID)
+	{
+		/*
+		 * This record is only logged by coordinator. But the segment in
+		 * some situation might see it too (e.g. gpexpand), but segment
+		 * doesn't need to update latestCompletedGxid.
+		 */
+		if (IS_QUERY_DISPATCHER())
+		{
+			DistributedTransactionId gxid;
+
+			gxid = *((DistributedTransactionId *) XLogRecGetData(record));
+			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+			ShmemVariableCache->latestCompletedGxid = gxid;
+			LWLockRelease(ProcArrayLock);
+		}
+	}
 	else
 		elog(PANIC, "standby_redo: unknown op code %u", info);
 }
@@ -1265,6 +1286,21 @@ LogStandbySnapshot(void)
 
 	/* GetRunningTransactionData() acquired XidGenLock, we must release it */
 	LWLockRelease(XidGenLock);
+	if (IS_QUERY_DISPATCHER())
+	{
+		/*
+		 * GPDB: write latestCompletedGxid too, because the standby needs this 
+		 * value for creating distributed snapshot. The standby cannot rely on
+		 * the nextGxid value to set latestCompletedGxid during restart (which 
+		 * the primary does) because nextGxid was bumped in the checkpoint.
+		 */
+		LWLockAcquire(ProcArrayLock, LW_SHARED);
+		DistributedTransactionId lcgxid = ShmemVariableCache->latestCompletedGxid;
+		LWLockRelease(ProcArrayLock);
+		XLogBeginInsert();
+		XLogRegisterData((char *) (&lcgxid), sizeof(lcgxid));
+		recptr = XLogInsert(RM_STANDBY_ID, XLOG_LATESTCOMPLETED_GXID);
+	}
 
 	return recptr;
 }
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index c8f283198ce..c3583b146d7 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -75,3 +75,4 @@ LoginFailedControlLock				65
 LoginFailedSharedMemoryLock			66
 GPIVMResLock						67
 DirectoryTableLock                  68
+CommittedGxidArrayLock				69
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index a174e981b1f..37d917a1f3e 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -354,17 +354,9 @@ InitProcess(void)
 	 * WAL sender, etc are marked as GP_ROLE_UTILITY to prevent unwanted
 	 * GP_ROLE_DISPATCH MyProc settings such as mppSessionId being valid and
 	 * mppIsWriter set to true.
-	 *
-	 * RecoveryInProgress() to see if we are in hot standby, because
-	 * HotStandbyActive() is still true after promotion.
 	 */
-	if (am_walsender || am_ftshandler || am_faulthandler ||
-		(GpIdentity.segindex == -1 && RecoveryInProgress()))
-	{
+	if (am_walsender || am_ftshandler || am_faulthandler)
 		Gp_role = GP_ROLE_UTILITY;
-		if (GpIdentity.segindex == -1 && RecoveryInProgress())
-			elog(WARNING, "Force to run in utility mode in hot standby");
-	}
 
 	/*
 	 * ProcGlobal should be set up already (if we are a backend, we inherit
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index f29c9c2e606..62ded58aafb 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1529,6 +1529,8 @@ exec_mpp_dtx_protocol_command(DtxProtocolCommand dtxProtocolCommand,
 	qc.commandTag = GetCommandTagEnum(loggingStr);
 	qc.nprocessed = 1;
 
+	SIMPLE_FAULT_INJECTOR("exec_dtx_protocol_start");
+
 	if (log_statement == LOGSTMT_ALL)
 		elog(LOG,"DTM protocol command '%s' for gid = %s", loggingStr, gid);
 
@@ -5714,6 +5716,7 @@ PostgresMain(int argc, char *argv[],
 					const char *serializedQueryDispatchDesc = NULL;
 					const char *resgroupInfoBuf = NULL;
 
+					int is_hs_dispatch;
 					int query_string_len = 0;
 					int serializedDtxContextInfolen = 0;
 					int serializedPlantreelen = 0;
@@ -5750,6 +5753,20 @@ PostgresMain(int argc, char *argv[],
 					cuid = pq_getmsgint(&input_message, 4);
 
 					statementStart = pq_getmsgint64(&input_message);
+
+					/* check if the message is from standby QD and is expected */
+					is_hs_dispatch = pq_getmsgint(&input_message, 4);
+					if (is_hs_dispatch == 0 && IS_HOT_STANDBY_QE())
+						ereport(ERROR,
+								(errcode(ERRCODE_PROTOCOL_VIOLATION),
+								 errmsg("mirror segments can only process MPP protocol messages from standby QD"),
+								 errhint("Exit the current session and re-connect.")));
+					else if (is_hs_dispatch != 0 && !IS_HOT_STANDBY_QE())
+						ereport(ERROR,
+								(errcode(ERRCODE_PROTOCOL_VIOLATION),
+								 errmsg("primary segments can only process MPP protocol messages from primary QD"),
+								 errhint("Exit the current session and re-connect.")));
+
 					query_string_len = pq_getmsgint(&input_message, 4);
 					serializedPlantreelen = pq_getmsgint(&input_message, 4);
 					serializedQueryDispatchDesclen = pq_getmsgint(&input_message, 4);
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 728d12c604a..532690f1d51 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -617,6 +617,8 @@ PortalStart(Portal portal, ParamListInfo params,
 						needDistributedSnapshot = false;
 				}
 				
+				SIMPLE_FAULT_INJECTOR("select_before_qd_create_snapshot");
+
 				/* Must set snapshot before starting executor. */
 				if (snapshot)
 					PushActiveSnapshot(snapshot);
@@ -626,6 +628,8 @@ PortalStart(Portal portal, ParamListInfo params,
 				/* reset value */
 				needDistributedSnapshot = true;
 
+				SIMPLE_FAULT_INJECTOR("select_after_qd_create_snapshot");
+
 				/*
 				 * We could remember the snapshot in portal->portalSnapshot,
 				 * but presently there seems no need to, as this code path
diff --git a/src/backend/utils/gp/segadmin.c b/src/backend/utils/gp/segadmin.c
index e8b9b309cb6..8d06ae50b48 100644
--- a/src/backend/utils/gp/segadmin.c
+++ b/src/backend/utils/gp/segadmin.c
@@ -181,87 +181,12 @@ static void
 remove_segment_config(int16 dbid)
 {
 #ifdef USE_INTERNAL_FTS
-	int			numDel = 0;
-	ScanKeyData scankey;
-	SysScanDesc sscan;
-	HeapTuple	tuple;
-	Relation	rel;
-
-	rel = table_open(GpSegmentConfigRelationId, RowExclusiveLock);
-
-	ScanKeyInit(&scankey,
-				Anum_gp_segment_configuration_dbid,
-				BTEqualStrategyNumber, F_INT2EQ,
-				Int16GetDatum(dbid));
-	sscan = systable_beginscan(rel, GpSegmentConfigDbidWarehouseIndexId, true,
-							   NULL, 1, &scankey);
-	while ((tuple = systable_getnext(sscan)) != NULL)
-	{
-		Datum		attr;
-		bool		isNull;
-		Oid			warehouseid = InvalidOid;
-
-		attr = heap_getattr(tuple, Anum_gp_segment_configuration_warehouseid,
-							RelationGetDescr(rel), &isNull);
-		Assert(!isNull);
-		warehouseid = DatumGetObjectId(attr);
-
-		if (!OidIsValid(warehouseid) || warehouseid == GetCurrentWarehouseId())
-		{
-			CatalogTupleDelete(rel, &tuple->t_self);
-			numDel++;
-		}
-	}
-	systable_endscan(sscan);
-
-	Assert(numDel > 0);
-
-	table_close(rel, NoLock);
+	remove_segment_config_entry(dbid);
 #else
 	delSegment(dbid);
 #endif
 }
 
-#ifdef USE_INTERNAL_FTS
-static void
-add_segment_config_entry(GpSegConfigEntry *i)
-{
-	Relation	rel = table_open(GpSegmentConfigRelationId, AccessExclusiveLock);
-	Datum		values[Natts_gp_segment_configuration];
-	bool		nulls[Natts_gp_segment_configuration];
-	HeapTuple	tuple;
-
-	MemSet(nulls, false, sizeof(nulls));
-
-	values[Anum_gp_segment_configuration_dbid - 1] = Int16GetDatum(i->dbid);
-	values[Anum_gp_segment_configuration_content - 1] = Int16GetDatum(i->segindex);
-	values[Anum_gp_segment_configuration_role - 1] = CharGetDatum(i->role);
-	values[Anum_gp_segment_configuration_preferred_role - 1] =
-		CharGetDatum(i->preferred_role);
-	values[Anum_gp_segment_configuration_mode - 1] =
-		CharGetDatum(i->mode);
-	values[Anum_gp_segment_configuration_status - 1] =
-		CharGetDatum(i->status);
-	values[Anum_gp_segment_configuration_port - 1] =
-		Int32GetDatum(i->port);
-	values[Anum_gp_segment_configuration_hostname - 1] =
-		CStringGetTextDatum(i->hostname);
-	values[Anum_gp_segment_configuration_address - 1] =
-		CStringGetTextDatum(i->address);
-	values[Anum_gp_segment_configuration_datadir - 1] =
-		CStringGetTextDatum(i->datadir);
-	values[Anum_gp_segment_configuration_warehouseid - 1] =
-		ObjectIdGetDatum(i->warehouseid);
-
-	tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls);
-
-	/* insert a new tuple */
-	CatalogTupleInsert(rel, tuple);
-
-	table_close(rel, NoLock);
-}
-#endif
-
 static void
 add_segment(GpSegConfigEntry *new_segment_information)
 {
diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c
index bd6ae4300da..247935c5945 100644
--- a/src/backend/utils/misc/guc_gp.c
+++ b/src/backend/utils/misc/guc_gp.c
@@ -3081,17 +3081,6 @@ struct config_bool ConfigureNamesBool_gp[] =
 		false,
 		NULL, NULL, NULL
 	},
-
-	{
-		{"gp_pause_on_restore_point_replay", PGC_SIGHUP, DEVELOPER_OPTIONS,
-		 gettext_noop("Pause recovery when a restore point is replayed."),
-		 NULL,
-		 GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
-		},
-		&gp_pause_on_restore_point_replay,
-		false,
-		NULL, NULL, NULL
-	},
 	{
 		{"gp_autostats_allow_nonowner", PGC_SUSET, DEVELOPER_OPTIONS,
 			gettext_noop("Allow automatic stats collection on tables even for users who are not the owner of the relation."),
@@ -5029,6 +5018,27 @@ struct config_string ConfigureNamesString_gp[] =
 		"udpifc",
 		check_gp_interconnect_type, assign_gp_interconnect_type, show_gp_interconnect_type
 	},
+	{
+		{"gp_pause_on_restore_point_replay", PGC_SUSET, DEVELOPER_OPTIONS,
+			gettext_noop("Specifies the restore point to pause replay on."),
+			gettext_noop("Unlike recovery_target_name, this can be used to continuously set/reset "
+						"how much a standby should replay up to."),
+			GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
+		},
+		&gp_pause_on_restore_point_replay,
+		"",
+		NULL, NULL, NULL
+	},
+	{
+		{"gp_segment_configuration_file", PGC_SUSET, DEVELOPER_OPTIONS,
+			gettext_noop("Specifies the recovery cluster gp_segment_configuration file"),
+			gettext_noop(""),
+			GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
+		},
+		&gp_segment_configuration_file,
+		"",
+		NULL, NULL, NULL
+	},
 
 	/* End-of-list marker */
 	{
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 42c22a0690a..0ae8ddf27f1 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -174,6 +174,7 @@ static char *external_fts_files;
 #endif
 static char *system_functions_file;
 static char *system_views_file;
+static char *system_views_gp_file;
 static bool success = false;
 static bool made_new_pgdata = false;
 static bool found_existing_pgdata = false;
@@ -2831,6 +2832,7 @@ setup_data_file_paths(void)
 	set_input(&system_constraints_file, "system_constraints.sql");
 	set_input(&system_functions_file, "system_functions.sql");
 	set_input(&system_views_file, "system_views.sql");
+	set_input(&system_views_gp_file, "system_views_gp.sql");
 
 	set_input(&cdb_init_d_dir, "cdb_init.d");
 
@@ -2864,6 +2866,7 @@ setup_data_file_paths(void)
 #endif
 	check_input(system_functions_file);
 	check_input(system_views_file);
+	check_input(system_views_gp_file);
 }
 
 
@@ -3231,6 +3234,7 @@ initialize_data_directory(void)
 	 */
 
 	setup_run_file(cmdfd, system_views_file);
+	setup_run_file(cmdfd, system_views_gp_file);
 
 	setup_description(cmdfd);
 
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index cec3e5f4cb7..687799bec9f 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -301,7 +301,7 @@ extern int xid_stop_limit;
 extern int xid_warn_limit;
 
 /* GPDB-specific */
-extern bool gp_pause_on_restore_point_replay;
+extern char *gp_pause_on_restore_point_replay;
 
 /* hook for plugins to assign new relfilenode */
 typedef Oid (*NewSegRelfilenode_assign_hook_type)(void);
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 2dfad411b7a..e8a73ceb201 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -11,6 +11,8 @@
 #ifndef XLOG_H
 #define XLOG_H
 
+#include "postgres.h" /* for Datum */
+
 #include "access/rmgr.h"
 #include "access/xlogdefs.h"
 #include "access/xloginsert.h"
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 86910a0dada..026192b3674 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -56,6 +56,6 @@
  */
 
 /*							3yyymmddN */
-#define CATALOG_VERSION_NO	302502091
+#define CATALOG_VERSION_NO	302506101
 
 #endif
diff --git a/src/include/cdb/cdbtm.h b/src/include/cdb/cdbtm.h
index 951b9013c00..2bf259a8744 100644
--- a/src/include/cdb/cdbtm.h
+++ b/src/include/cdb/cdbtm.h
@@ -35,8 +35,12 @@ typedef enum
 	DTX_STATE_NONE = 0,
 
 	/**
-	 * The distributed transaction is active and requires distributed coordination
-	 *   (because it is explicit or an implicit writer transaction)
+	 * The distributed transaction is active.
+	 * For a primary, this state means the transaction requires distributed
+	 * coordination (because it is explicit or an implicit writer transaction),
+	 * and it will switch to other dtx states in different phases.
+	 * For a hot standby, there is no coordination necessary so transaction 
+	 * will stay in this state until the end of the commit.
 	 */
 	DTX_STATE_ACTIVE_DISTRIBUTED,
 
@@ -232,6 +236,7 @@ typedef struct TMGXACTLOCAL
 {
 	/*
 	 * Memory only fields.
+	 * If we are in hot standby, only 'state' is relevant.
 	 */
  	DtxState				state;
 	
diff --git a/src/include/cdb/cdbutil.h b/src/include/cdb/cdbutil.h
index 22c3cc782d8..d3711ca3ff8 100644
--- a/src/include/cdb/cdbutil.h
+++ b/src/include/cdb/cdbutil.h
@@ -37,6 +37,8 @@ extern char *gp_etcd_cluster_id;
 extern char *gp_etcd_namespace;
 extern char *gp_etcd_endpoints;
 
+extern char *gp_segment_configuration_file;
+
 typedef struct GpSegConfigEntryForUDF
 {
 	GpSegConfigEntry * config_entry;
@@ -132,6 +134,9 @@ extern char *getDnsAddress(char *name, int port, int elevel);
 
 #ifdef USE_INTERNAL_FTS
 extern void writeGpSegConfigToFTSFiles(void);
+extern void add_segment_config_entry(GpSegConfigEntry *i);
+extern void remove_segment_config_entry(int16 dbid);
+extern void write_gp_segment_configuration(void);
 #else
 
 GpSegConfigEntry * readGpSegConfig(char * buff, int *total_dbs);
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 90af5177ce0..2393384ec3a 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -19,6 +19,7 @@
 #ifndef CDBVARS_H
 #define CDBVARS_H
 
+#include "access/xlog.h"  /*RecoveryInProgress*/
 #include "access/xlogdefs.h"  /*XLogRecPtr*/
 #include "cdb/cdbutil.h" /* MASTER_CONTENT_ID */
 #ifdef USE_INTERNAL_FTS
@@ -757,8 +758,10 @@ extern GpId GpIdentity;
 
 #define UNINITIALIZED_GP_IDENTITY_VALUE (-10000)
 #define IS_QUERY_DISPATCHER() (GpIdentity.segindex == MASTER_CONTENT_ID)
+#define IS_HOT_STANDBY_QD() (EnableHotStandby && IS_QUERY_DISPATCHER() && RecoveryInProgress())
 
 #define IS_QUERY_EXECUTOR_BACKEND() (Gp_role == GP_ROLE_EXECUTE && gp_session_id > 0)
+#define IS_HOT_STANDBY_QE() (EnableHotStandby && IS_QUERY_EXECUTOR_BACKEND() && RecoveryInProgress())
 
 /* Stores the listener port that this process uses to listen for incoming
  * Interconnect connections from other Motion nodes.
diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h
index d99e6f40c6d..f007fe25245 100644
--- a/src/include/storage/standbydefs.h
+++ b/src/include/storage/standbydefs.h
@@ -34,6 +34,7 @@ extern void standby_desc_invalidations(StringInfo buf,
 #define XLOG_STANDBY_LOCK			0x00
 #define XLOG_RUNNING_XACTS			0x10
 #define XLOG_INVALIDATIONS			0x20
+#define XLOG_LATESTCOMPLETED_GXID   0xF0
 
 typedef struct xl_standby_locks
 {
diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h
index 959d2a89a73..abf189263bb 100644
--- a/src/include/utils/unsync_guc_name.h
+++ b/src/include/utils/unsync_guc_name.h
@@ -241,6 +241,7 @@
 		"gp_max_slices",
 		"gp_motion_cost_per_row",
 		"gp_pause_on_restore_point_replay",
+		"gp_segment_configuration_file",
 		"gp_predicate_pushdown_sample_rows",
 		"gp_print_create_gang_time",
 		"gp_qd_hostname",
diff --git a/src/test/isolation2/Makefile b/src/test/isolation2/Makefile
index 759b2855513..bc1e0f66be0 100644
--- a/src/test/isolation2/Makefile
+++ b/src/test/isolation2/Makefile
@@ -90,3 +90,6 @@ installcheck-cbdb-parallel: install
 	export PGOPTIONS='-c optimizer=off -c enable_parallel=true'; \
 	$(pg_isolation2_regress_installcheck) --init-file=$(top_builddir)/src/test/regress/init_file --init-file=./init_file_isolation2 --schedule=$(srcdir)/isolation2_schedule \
 	)
+
+installcheck-hot-standby: install
+	$(pg_isolation2_regress_installcheck) $(EXTRA_REGRESS_OPTS) --init-file=$(top_builddir)/src/test/regress/init_file --init-file=./init_file_isolation2 --schedule=$(srcdir)/hot_standby_schedule --dbname=isolation2-hot-standby
diff --git a/src/test/isolation2/expected/hot_standby/basic.out b/src/test/isolation2/expected/hot_standby/basic.out
new file mode 100644
index 00000000000..5318a35d7d7
--- /dev/null
+++ b/src/test/isolation2/expected/hot_standby/basic.out
@@ -0,0 +1,242 @@
+-- Tests for basic query dispatch on a hot standy.
+
+-- hot standby must show on and the sync mode is remote_apply for the tests to make sense
+-1S: show hot_standby;
+ hot_standby 
+-------------
+ on          
+(1 row)
+-1S: show synchronous_commit;
+ synchronous_commit 
+--------------------
+ remote_apply       
+(1 row)
+
+-- will be checking if QD/QE info looks good
+-1S: select id, type, content, port from gp_backend_info();
+ id | type | content | port 
+----+------+---------+------
+ -1 | Q    | -1      | 7001 
+(1 row)
+
+----------------------------------------------------------------
+-- Test: basic query dispatch
+----------------------------------------------------------------
+create table hs_t1(a int);
+CREATE
+create table hs_t2(a int);
+CREATE
+
+-- standby should see the results for 2pc immediately.
+insert into hs_t1 select * from generate_series(1,10);
+INSERT 10
+-1S: select * from hs_t1;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+ 1  
+(10 rows)
+-- standby won't see results for the last 1pc immediately because the standby QD
+-- isn't aware of of it so its distributed snapshot doesn't include the 1pc, but
+-- as long as another 2pc comes it will be able to see the previous 1pc. Wee
+-- tolerate this case in the mirrored cluster setup.
+insert into hs_t2 values(1);
+INSERT 1
+-1S: select * from hs_t2;
+ a 
+---
+(0 rows)
+-- any following 2pc will make the 1pc visible
+create temp table tt(a int);
+CREATE
+-1S: select * from hs_t2;
+ a 
+---
+ 1 
+(1 row)
+
+-- we have three QEs launched on the mirror segments.
+-- note that the first QE on a segment is still a "writer" because we
+-- need it to manage locks, same as read-only queries on a primary QD.
+-1S: select id, type, content, port from gp_backend_info();
+ id | type | content | port 
+----+------+---------+------
+ -1 | Q    | -1      | 7001 
+ 0  | w    | 0       | 7005 
+ 1  | w    | 1       | 7006 
+ 2  | w    | 2       | 7007 
+(4 rows)
+
+-- should have parallel readers launched
+-1S: select * from hs_t1 join (select * from hs_t2) hs_t2 on (hs_t1 = hs_t2);
+ a | a 
+---+---
+ 1 | 1 
+(1 row)
+-1S: select id, type, content, port from gp_backend_info();
+ id | type | content | port 
+----+------+---------+------
+ -1 | Q    | -1      | 7001 
+ 0  | w    | 0       | 7005 
+ 1  | w    | 1       | 7006 
+ 2  | w    | 2       | 7007 
+ 3  | r    | 0       | 7005 
+ 4  | r    | 1       | 7006 
+ 5  | r    | 2       | 7007 
+(7 rows)
+
+-- now a singleton reader added too
+-1S: select * from hs_t1 join (select oid::int from pg_class) hs_t2 on (hs_t1 = hs_t2);
+ a | oid 
+---+-----
+(0 rows)
+-1S: select id, type, content, port from gp_backend_info();
+ id | type | content | port 
+----+------+---------+------
+ -1 | Q    | -1      | 7001 
+ 0  | w    | 0       | 7005 
+ 1  | w    | 1       | 7006 
+ 2  | w    | 2       | 7007 
+ 3  | r    | 0       | 7005 
+ 4  | r    | 1       | 7006 
+ 5  | r    | 2       | 7007 
+ 6  | R    | -1      | 7001 
+(8 rows)
+
+-- un-committed result should not be seen by the standby
+begin;
+BEGIN
+insert into hs_t1 select * from generate_series(11,20);
+INSERT 10
+
+-- standby should only see 1...10
+-1S: select * from hs_t1;
+ a  
+----
+ 5  
+ 6  
+ 9  
+ 10 
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+(10 rows)
+
+end;
+END
+
+-- standby should see 1...20 now
+-1S: select * from hs_t1;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 16 
+ 18 
+ 19 
+ 1  
+ 12 
+ 15 
+ 20 
+ 5  
+ 6  
+ 9  
+ 10 
+ 11 
+ 13 
+ 14 
+ 17 
+(20 rows)
+
+----------------------------------------------------------------
+-- Test: other things that a hot standby can do.
+--
+-- More refer to regress test 'hs_standby_allowed'.
+----------------------------------------------------------------
+-- set/reset and show GUC
+-1S: set optimizer = on;
+SET
+-1S: show optimizer;
+ optimizer 
+-----------
+ on        
+(1 row)
+-1S: reset optimizer;
+RESET
+-- copy command
+-1S: copy hs_t1 to '/tmp/hs_copyto.csv' csv null '';
+COPY 20
+-- query catalogs
+-1S: select count(*) from pg_class where relname = 'hs_t1';
+ count 
+-------
+ 1     
+(1 row)
+-1S: select dbid,content,role,preferred_role,mode,status from gp_segment_configuration where dbid = current_setting('gp_dbid')::integer;
+ dbid | content | role | preferred_role | mode | status 
+------+---------+------+----------------+------+--------
+ 8    | -1      | m    | m              | s    | u      
+(1 row)
+-- checkpoint is allowed on standby but a restart point is created instead
+-1S: checkpoint;
+CHECKPOINT
+
+----------------------------------------------------------------
+-- Test: things that can't be done on a hot standby:
+-- no DML, DDL or anything that generates WAL.
+--
+-- More refer to regress test 'hs_standby_disallowed'.
+----------------------------------------------------------------
+-1S: insert into hs_t1 values(1);
+ERROR:  cannot execute INSERT in a read-only transaction
+-1S: delete from hs_t1;
+ERROR:  cannot acquire lock mode ExclusiveLock on database objects while recovery is in progress
+LINE 1: delete from hs_t1;
+                    ^
+HINT:  Only RowExclusiveLock or less can be acquired on database objects during recovery.
+-1S: update hs_t1 set a = 0;
+ERROR:  cannot acquire lock mode ExclusiveLock on database objects while recovery is in progress
+LINE 1: update hs_t1 set a = 0;
+               ^
+HINT:  Only RowExclusiveLock or less can be acquired on database objects during recovery.
+-1S: create table hs_t2(a int);
+ERROR:  cannot execute CREATE TABLE in a read-only transaction
+-1S: create database hs_db;
+ERROR:  cannot execute CREATE DATABASE in a read-only transaction
+-1S: vacuum hs_t1;
+ERROR:  cannot execute VACUUM during recovery
+
+--
+-- No hintbit WAL generation in SELECT.
+--
+create table hs_nohintbit(a int) distributed by (a);
+CREATE
+insert into hs_nohintbit select generate_series (1, 10);
+INSERT 10
+-- flush the data to disk
+checkpoint;
+CHECKPOINT
+
+-1S: set gp_disable_tuple_hints=off;
+SET
+-- no WAL is being generated (otherwise an error would occur "cannot make new WAL entries during recovery")
+-1S: SELECT count(*) FROM hs_nohintbit;
+ count 
+-------
+ 10    
+(1 row)
+
diff --git a/src/test/isolation2/expected/hot_standby/faults.out b/src/test/isolation2/expected/hot_standby/faults.out
new file mode 100644
index 00000000000..39f3a06cca6
--- /dev/null
+++ b/src/test/isolation2/expected/hot_standby/faults.out
@@ -0,0 +1,326 @@
+-- Test system faults scenarios
+
+-- start_matchsubs
+--
+-- m/Is the server running on host.*/
+-- s/Is the server running on host "\d+.\d+.\d+.\d+" and accepting/Is the server running on host <IP> and accepting/
+-- m/(seg\d+ \d+.\d+.\d+.\d+:\d+)/
+-- s/(.*)/(seg<ID> IP:PORT)/
+-- m/ERROR:  connection to dbid 1 .*:7000 failed .*/
+-- s/ERROR:  connection to dbid 1 .*:7000 failed .*/ERROR:  connection to dbid 1 <host>:7000 failed/
+--
+-- end_matchsubs
+
+-- Let FTS detect/declare failure sooner
+!\retcode gpconfig -c gp_fts_probe_interval -v 10 --coordinatoronly;
+(exited with code 0)
+!\retcode gpstop -u;
+(exited with code 0)
+
+create table hs_failover(a int);
+CREATE
+insert into hs_failover select * from generate_series(1,10);
+INSERT 10
+-1S: select * from hs_failover;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+
+----------------------------------------------------------------
+-- Mirror segment fails
+----------------------------------------------------------------
+select pg_ctl(datadir, 'stop', 'immediate') from gp_segment_configuration where content=1 and role = 'm';
+ pg_ctl 
+--------
+ OK     
+(1 row)
+
+-- make sure mirror is detected down
+create temp table hs_tt(a int);
+CREATE
+select gp_request_fts_probe_scan();
+ gp_request_fts_probe_scan 
+---------------------------
+ t                         
+(1 row)
+
+-- will not succeed
+-1S: select * from hs_failover;
+ERROR:  Error on receive from seg1 slice1 127.0.1.1:7006 pid=26942: server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+-1Sq: ... <quitting>
+
+-- recovery
+!\retcode gprecoverseg -aF;
+(exited with code 0)
+
+-- sync-up
+select wait_until_all_segments_synchronized();
+ wait_until_all_segments_synchronized 
+--------------------------------------
+ OK                                   
+(1 row)
+
+-- works now
+-1S: select * from hs_failover;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+ 1  
+(10 rows)
+
+----------------------------------------------------------------
+-- Primary segment fails
+----------------------------------------------------------------
+-- inject a fault where the mirror gets out of recovery
+select gp_inject_fault('out_of_recovery_in_startupxlog', 'skip', dbid) from gp_segment_configuration where content = 1 and role = 'm';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+select pg_ctl(datadir, 'stop', 'immediate') from gp_segment_configuration where content=1 and role = 'p';
+ pg_ctl 
+--------
+ OK     
+(1 row)
+select gp_request_fts_probe_scan();
+ gp_request_fts_probe_scan 
+---------------------------
+ t                         
+(1 row)
+
+-- make sure failover happens
+select dbid, content, role, preferred_role, mode, status from gp_segment_configuration where content = 1;
+ dbid | content | role | preferred_role | mode | status 
+------+---------+------+----------------+------+--------
+ 3    | 1       | m    | p              | n    | d      
+ 6    | 1       | p    | m              | n    | u      
+(2 rows)
+select gp_wait_until_triggered_fault('out_of_recovery_in_startupxlog', 1, dbid) from gp_segment_configuration where content = 1 and role = 'p';
+ gp_wait_until_triggered_fault 
+-------------------------------
+ Success:                      
+(1 row)
+select gp_inject_fault('out_of_recovery_in_startupxlog', 'reset', dbid) from gp_segment_configuration where content = 1 and role = 'p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+-- On an existing standby connection, query will run but it is dispatched to the previous mirror
+-- in an existing gang. That mirror is now a primary, so it will complain and the query fails.
+-1S: select * from hs_failover;
+ERROR:  primary segments can only process MPP protocol messages from primary QD  (seg1 slice1 127.0.1.1:7006 pid=14671)
+HINT:  Exit the current session and re-connect.
+-1Sq: ... <quitting>
+
+-- will fail due to downed mirror (previous primary)
+-1S: select * from hs_failover;
+ERROR:  failed to acquire resources on one or more segments
+DETAIL:  connection to server at "10.13.9.74", port 7003 failed: Connection refused
+	Is the server running on that host and accepting TCP/IP connections?
+ (seg1 10.13.9.74:7003)
+-1Sq: ... <quitting>
+
+-- bring the downed mirror up
+!\retcode gprecoverseg -aF;
+(exited with code 0)
+select wait_until_all_segments_synchronized();
+ wait_until_all_segments_synchronized 
+--------------------------------------
+ OK                                   
+(1 row)
+
+-- mirror is up
+-1S: select dbid, content, role, preferred_role, mode, status from gp_segment_configuration where content = 1;
+ dbid | content | role | preferred_role | mode | status 
+------+---------+------+----------------+------+--------
+ 6    | 1       | p    | m              | s    | u      
+ 3    | 1       | m    | p              | s    | u      
+(2 rows)
+
+-- now the query will succeed
+-1S: select * from hs_failover;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+ 1  
+(10 rows)
+-1Sq: ... <quitting>
+
+-- re-balance, bring the segments to their preferred roles
+!\retcode gprecoverseg -ar;
+(exited with code 0)
+select wait_until_all_segments_synchronized();
+ wait_until_all_segments_synchronized 
+--------------------------------------
+ OK                                   
+(1 row)
+-1S: select dbid, content, role, preferred_role, mode, status from gp_segment_configuration where content = 1;
+ dbid | content | role | preferred_role | mode | status 
+------+---------+------+----------------+------+--------
+ 3    | 1       | p    | p              | s    | u      
+ 6    | 1       | m    | m              | s    | u      
+(2 rows)
+
+-- query runs fine still
+-1S: select * from hs_failover;
+ a  
+----
+ 5  
+ 6  
+ 9  
+ 10 
+ 1  
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+(10 rows)
+
+----------------------------------------------------------------
+-- DTX recovery
+----------------------------------------------------------------
+-- skip FTS probe to prevent unexpected mirror promotion
+1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1;
+ gp_inject_fault_infinite 
+--------------------------
+ Success:                 
+(1 row)
+
+1: create table tt_hs_dtx(a int);
+CREATE
+
+-- inject fault to repeatedly fail the COMMIT PREPARE phase of 2PC, which ensures that the dtx cannot finish even by the dtx recovery process.
+select gp_inject_fault_infinite('finish_commit_prepared', 'error', dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault_infinite 
+--------------------------
+ Success:                 
+(1 row)
+
+-- session 1 on primary QD tries to commit a DTX, but cannot finish due to the fault on a QE
+1&: insert into tt_hs_dtx select * from generate_series(1,10);  <waiting ...>
+
+-- inject a panic on primary QD, essentially restarts the primary QD
+2: select gp_inject_fault('before_read_command', 'panic', dbid) from gp_segment_configuration where content=-1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+2: select 1;
+PANIC:  fault triggered, fault name:'before_read_command' fault type:'panic'
+server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+
+1<:  <... completed>
+server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+1q: ... <quitting>
+2q: ... <quitting>
+
+-- standby QD can still run query
+-1S: select * from hs_failover;
+ a  
+----
+ 1  
+ 10 
+ 2  
+ 3  
+ 4  
+ 5  
+ 6  
+ 7  
+ 8  
+ 9  
+(10 rows)
+-- it cannot see rows from the in-doubt DTX
+-1S: select * from tt_hs_dtx;
+ a 
+---
+(0 rows)
+
+-- let the failed dtx be recovered, also make sure the standby replays the forget record which signals the completion of the dtx
+-1S: select gp_inject_fault('redoDistributedForgetCommitRecord', 'skip', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-1S: select gp_inject_fault_infinite('finish_commit_prepared', 'reset', dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault_infinite 
+--------------------------
+ Success:                 
+(1 row)
+-1S: select gp_wait_until_triggered_fault('redoDistributedForgetCommitRecord', 1, dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_wait_until_triggered_fault 
+-------------------------------
+ Success:                      
+(1 row)
+-1S: select gp_inject_fault('redoDistributedForgetCommitRecord', 'reset', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+-- standby should see the rows from the in-doubt DTX now
+-1S: select * from tt_hs_dtx;
+ a  
+----
+ 1  
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+
+-1S: select wait_until_all_segments_synchronized();
+ wait_until_all_segments_synchronized 
+--------------------------------------
+ OK                                   
+(1 row)
+1: select gp_inject_fault('before_read_command', 'reset', dbid) from gp_segment_configuration where content=-1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1;
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
diff --git a/src/test/isolation2/expected/hot_standby/setup.out b/src/test/isolation2/expected/hot_standby/setup.out
new file mode 100644
index 00000000000..f8f1e02fe40
--- /dev/null
+++ b/src/test/isolation2/expected/hot_standby/setup.out
@@ -0,0 +1,14 @@
+-- setup for hot standby tests
+!\retcode gpconfig -c hot_standby -v on;
+(exited with code 0)
+-- let primary wait for standby to apply changes, make test less flaky
+!\retcode gpconfig -c synchronous_commit -v remote_apply;
+(exited with code 0)
+-- make it faster to handle query conflict
+!\retcode gpconfig -c max_standby_streaming_delay -v 1000;
+(exited with code 0)
+-- disable autovacuum, to not affect the manual VACUUM in the tests
+!\retcode gpconfig -c autovacuum -v off;
+(exited with code 0)
+!\retcode gpstop -ar;
+(exited with code 0)
diff --git a/src/test/isolation2/expected/hot_standby/teardown.out b/src/test/isolation2/expected/hot_standby/teardown.out
new file mode 100644
index 00000000000..8b4e1271610
--- /dev/null
+++ b/src/test/isolation2/expected/hot_standby/teardown.out
@@ -0,0 +1,9 @@
+-- reset the setup for hot standby tests
+!\retcode gpconfig -r hot_standby;
+(exited with code 0)
+!\retcode gpconfig -r synchronous_commit;
+(exited with code 0)
+!\retcode gpconfig -r max_standby_streaming_delay;
+(exited with code 0)
+!\retcode gpstop -ar;
+(exited with code 0)
diff --git a/src/test/isolation2/expected/hot_standby/transaction_isolation.out b/src/test/isolation2/expected/hot_standby/transaction_isolation.out
new file mode 100644
index 00000000000..3990bd7cd56
--- /dev/null
+++ b/src/test/isolation2/expected/hot_standby/transaction_isolation.out
@@ -0,0 +1,984 @@
+----------------------------------------------------------------
+-- Test transaction isolation in general, not specific to dtx
+----------------------------------------------------------------
+1: create table hs_tx(a int);
+CREATE
+1: insert into hs_tx select * from generate_series(1,10);
+INSERT 10
+
+1: begin;
+BEGIN
+1: insert into hs_tx select * from generate_series(11,20);
+INSERT 10
+2: begin;
+BEGIN
+2: insert into hs_tx select * from generate_series(21,30);
+INSERT 10
+2: abort;
+ABORT
+
+-- standby should only see completed transactions, not in-progress transactions, nor aborted transactions
+-1S: select * from hs_tx;
+ a  
+----
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+(10 rows)
+
+1: end;
+END
+-1S: select * from hs_tx;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 16 
+ 18 
+ 19 
+ 1  
+ 12 
+ 15 
+ 20 
+ 5  
+ 6  
+ 9  
+ 10 
+ 11 
+ 13 
+ 14 
+ 17 
+(20 rows)
+
+----------------------------------------------------------------
+-- Test isolation between hot standby query and in-progress dtx
+----------------------------------------------------------------
+
+1: create table hs_dtx1(a int);
+CREATE
+1: create table hs_dtx2(a int);
+CREATE
+
+-- inject two suspend faults:
+-- 1. on seg0, suspend before PREPARE phase of 2PC
+1: select gp_inject_fault('qe_start_prepared', 'suspend',dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1&: insert into hs_dtx1 select * from  generate_series(1,10);  <waiting ...>
+-- 2. on seg1, suspend before COMMIT phase of 2PC
+2: select gp_inject_fault('qe_start_commit_prepared', 'suspend',dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+2&: insert into hs_dtx2 select * from  generate_series(1,10);  <waiting ...>
+
+-- standby should not see any rows from either dtx
+-1S: select * from hs_dtx1;
+ a 
+---
+(0 rows)
+-1S: select * from hs_dtx2;
+ a 
+---
+(0 rows)
+
+-- reset
+3: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+3: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1<:  <... completed>
+INSERT 10
+2<:  <... completed>
+INSERT 10
+
+-- standby should see the results from the dtx now
+-1S: select * from hs_dtx1;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+-1S: select * from hs_dtx2;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+ 1  
+(10 rows)
+
+----------------------------------------------------------------
+-- Test DTX abort that happens in different phases
+----------------------------------------------------------------
+
+1: create table hs_abort_dtx1(a int);
+CREATE
+1: create table hs_abort_dtx2(a int);
+CREATE
+
+-- inject two errors:
+-- 1. on seg0, error out before PREPARE phase of 2PC
+1: select gp_inject_fault('qe_start_prepared', 'error', dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1: insert into hs_abort_dtx1 select * from  generate_series(1,10);
+ERROR:  fault triggered, fault name:'qe_start_prepared' fault type:'error'  (seg0 127.0.1.1:7002 pid=343)
+1: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-- 2. on seg1, error out before COMMIT phase of 2PC
+1: select gp_inject_fault('qe_start_commit_prepared', 'error', dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1: insert into hs_abort_dtx2 select * from  generate_series(1,10);
+INSERT 10
+1: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+-- standby should not see dtx1 which is aborted but should see dtx2 which is recovered
+-1S: select * from hs_abort_dtx1;
+ a 
+---
+(0 rows)
+-1S: select * from hs_abort_dtx2;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+
+----------------------------------------------------------------
+-- Test isolation between hot standby query and in-progress dtx,
+-- but also run more queries in between
+----------------------------------------------------------------
+1: create table hs_dtx3(a int);
+CREATE
+
+-- inject faults to suspend segments in 2PC
+1: select gp_inject_fault('qe_start_prepared', 'suspend', dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1&: insert into hs_dtx3 select * from  generate_series(1,10);  <waiting ...>
+2: select gp_inject_fault('qe_start_commit_prepared', 'suspend', dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+2&: insert into hs_dtx3 select * from  generate_series(11,20);  <waiting ...>
+
+-- standby should not see rows in the in-progress dtx
+-1S: select * from hs_dtx3;
+ a 
+---
+(0 rows)
+
+-- now run some dtx and completed
+3: insert into hs_dtx3 values(99);
+INSERT 1
+3: create table hs_dtx4(a int);
+CREATE
+3: insert into hs_dtx4 select * from  generate_series(1,10);
+INSERT 10
+
+-- standby should still not see rows in the in-progress DTX, but should see the completed ones
+-1S: select * from hs_dtx3;
+ a  
+----
+ 99 
+(1 row)
+-1S: select * from hs_dtx4;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+ 1  
+(10 rows)
+
+3: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+3: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1<:  <... completed>
+INSERT 10
+2<:  <... completed>
+INSERT 10
+
+-- standby should see all rows now
+-1S: select * from hs_dtx3;
+ a  
+----
+ 1  
+ 12 
+ 15 
+ 20 
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 16 
+ 18 
+ 19 
+ 99 
+ 5  
+ 6  
+ 9  
+ 10 
+ 11 
+ 13 
+ 14 
+ 17 
+(21 rows)
+
+----------------------------------------------------------------
+-- Test isolation between standby QD and in-progress dtx,
+-- but after standby QD resets and gets running DTX from checkpoint.
+----------------------------------------------------------------
+1: create table hs_t5(a int, b text);
+CREATE
+1: create table hs_t6(a int, b text);
+CREATE
+
+-- inject fault to suspend a primary right before it conducts the commit phase of 2PC,
+-- so in the subsequent INSERT, all local transactions will be committed but the dtx is not.
+1: select gp_inject_fault('qe_start_commit_prepared', 'suspend', dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1&: insert into hs_t5 select i, 'in-progress' from generate_series(1,10) i;  <waiting ...>
+
+-- now run some dtx and completed, and primary conducts a checkpoint
+2: insert into hs_t5 values(1, 'commited');
+INSERT 1
+2: insert into hs_t6 select i, 'committed' from generate_series(1,10) i;
+INSERT 10
+2: begin;
+BEGIN
+2: insert into hs_t5 values(99, 'aborted');
+INSERT 1
+2: abort;
+ABORT
+2: checkpoint;
+CHECKPOINT
+
+-- now make the standby QD resets itself
+-1S: select gp_inject_fault('exec_simple_query_start', 'panic', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-1S: select 1;
+PANIC:  fault triggered, fault name:'exec_simple_query_start' fault type:'panic'
+server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+-1Sq: ... <quitting>
+
+-- standby should still not see rows in the in-progress DTX, but should see the completed ones
+-1S: select * from hs_t5;
+ a | b        
+---+----------
+ 1 | commited 
+(1 row)
+-1S: select * from hs_t6;
+ a  | b         
+----+-----------
+ 1  | committed 
+ 2  | committed 
+ 3  | committed 
+ 4  | committed 
+ 7  | committed 
+ 8  | committed 
+ 5  | committed 
+ 6  | committed 
+ 9  | committed 
+ 10 | committed 
+(10 rows)
+
+2: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1<:  <... completed>
+INSERT 10
+
+-- standby should see all rows now
+-1S: select * from hs_t5;
+ a  | b           
+----+-------------
+ 1  | in-progress 
+ 1  | commited    
+ 5  | in-progress 
+ 6  | in-progress 
+ 9  | in-progress 
+ 10 | in-progress 
+ 2  | in-progress 
+ 3  | in-progress 
+ 4  | in-progress 
+ 7  | in-progress 
+ 8  | in-progress 
+(11 rows)
+-1S: select * from hs_t6;
+ a  | b         
+----+-----------
+ 5  | committed 
+ 6  | committed 
+ 9  | committed 
+ 10 | committed 
+ 1  | committed 
+ 2  | committed 
+ 3  | committed 
+ 4  | committed 
+ 7  | committed 
+ 8  | committed 
+(10 rows)
+
+-- standby should correctly see more in-progress dtx on the primary.
+-- context: previously this would be fail because the standby updates latestCompletedGxid to the
+-- bumped nextGxid from checkpoint, which is too far (so that it thinks the new dtx already completed).
+1: select gp_inject_fault('qe_start_prepared', 'suspend', dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1&: delete from hs_t5;  <waiting ...>
+2: select gp_inject_fault('qe_start_commit_prepared', 'suspend', dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+2&: delete from hs_t6;  <waiting ...>
+
+-- standby should not see the effect of the deletes
+-1S: select * from hs_t5;
+ a  | b           
+----+-------------
+ 2  | in-progress 
+ 3  | in-progress 
+ 4  | in-progress 
+ 7  | in-progress 
+ 8  | in-progress 
+ 1  | in-progress 
+ 1  | commited    
+ 5  | in-progress 
+ 6  | in-progress 
+ 9  | in-progress 
+ 10 | in-progress 
+(11 rows)
+-1S: select * from hs_t6;
+ a  | b         
+----+-----------
+ 1  | committed 
+ 2  | committed 
+ 3  | committed 
+ 4  | committed 
+ 7  | committed 
+ 8  | committed 
+ 5  | committed 
+ 6  | committed 
+ 9  | committed 
+ 10 | committed 
+(10 rows)
+
+3: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+3: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+1<:  <... completed>
+DELETE 11
+2<:  <... completed>
+DELETE 10
+
+-- standby now see those deletes
+-1S: select * from hs_t5;
+ a | b 
+---+---
+(0 rows)
+-1S: select * from hs_t6;
+ a | b 
+---+---
+(0 rows)
+
+----------------------------------------------------------------
+-- Read-committed isolation: query on hot standby should not see dtx that completed after it
+-- created distributed snapshot, but should see dtx that completed before that.
+----------------------------------------------------------------
+
+1: create table hs_rc(a int);
+CREATE
+1: insert into hs_rc select * from generate_series(1,10);
+INSERT 10
+
+-- case 1: suspend SELECT on the standby QD right after it created snapshot
+-1S: select gp_inject_fault('select_after_qd_create_snapshot', 'suspend', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-1S&: select * from hs_rc;  <waiting ...>
+
+-- new INSERT or DELETE won't be observed by the standby
+1: insert into hs_rc select * from generate_series(11,20);
+INSERT 10
+1: delete from hs_rc where a < 5;
+DELETE 4
+1: select gp_inject_fault('select_after_qd_create_snapshot', 'reset', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+-- should only see the rows at the time when SELECT started (1...10).
+-1S<:  <... completed>
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+
+-- SELECT again, should see the effect from the INSERT and DELETE now
+-1S: select * from hs_rc;
+ a  
+----
+ 12 
+ 15 
+ 20 
+ 7  
+ 8  
+ 16 
+ 18 
+ 19 
+ 5  
+ 6  
+ 9  
+ 10 
+ 11 
+ 13 
+ 14 
+ 17 
+(16 rows)
+
+-- case 2: suspend SELECT on the standby QD before creating snapshot
+-1S: select gp_inject_fault('select_before_qd_create_snapshot', 'suspend', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-1S&: select * from hs_rc;  <waiting ...>
+
+1: insert into hs_rc select * from generate_series(21,30);
+INSERT 10
+1: delete from hs_rc where a < 21;
+DELETE 16
+1: select gp_inject_fault('select_before_qd_create_snapshot', 'reset', dbid) from gp_segment_configuration where content=-1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+-- standby should see the effect of the INSERT and DELETE
+-1S<:  <... completed>
+ a  
+----
+ 23 
+ 26 
+ 30 
+ 22 
+ 24 
+ 27 
+ 29 
+ 21 
+ 25 
+ 28 
+(10 rows)
+
+----------------------------------------------------------------
+-- Read-committed isolation in the BEGIN...END block
+----------------------------------------------------------------
+
+1: truncate hs_rc;
+TRUNCATE
+1: insert into hs_rc select * from generate_series(1,30);
+INSERT 30
+
+-1S: begin;
+BEGIN
+-1S: select count(*) from hs_rc;
+ count 
+-------
+ 30    
+(1 row)
+
+-- have some concurrent sessions on primary QD:
+-- 1. a completed transaction
+1: delete from hs_rc where a <= 10;
+DELETE 10
+-- 3. an aborted transaction
+2: begin;
+BEGIN
+2: delete from hs_rc where a > 10 and a <= 20;
+DELETE 10
+2: abort;
+ABORT
+-- 3. an ongoing transaction
+3: begin;
+BEGIN
+3: delete from hs_rc where a > 20 and a <= 30;
+DELETE 10
+
+-- the standby should see results accordingly
+-1S: select * from hs_rc;
+ a  
+----
+ 12 
+ 15 
+ 20 
+ 23 
+ 26 
+ 30 
+ 11 
+ 13 
+ 14 
+ 17 
+ 21 
+ 25 
+ 28 
+ 16 
+ 18 
+ 19 
+ 22 
+ 24 
+ 27 
+ 29 
+(20 rows)
+-1S: end;
+END
+
+3: end;
+END
+-1S: select * from hs_rc;
+ a  
+----
+ 12 
+ 15 
+ 20 
+ 11 
+ 13 
+ 14 
+ 17 
+ 16 
+ 18 
+ 19 
+(10 rows)
+
+----------------------------------------------------------------
+-- Repeatable-read isolation: distributed snapshot is created at time of the
+-- first query in transaction block. All queries in the transaction block
+-- should only see results committed before the distributed snapshot creation.
+----------------------------------------------------------------
+
+1: create table hs_rr(a int);
+CREATE
+1: insert into hs_rr select * from generate_series(1,10);
+INSERT 10
+
+-1S: begin isolation level repeatable read;
+BEGIN
+-- should see 10
+-1S: select count(*) from hs_rr;
+ count 
+-------
+ 10    
+(1 row)
+
+-- do some more INSERT, DELETE and UPDATE
+1: insert into hs_rr select * from generate_series(11,20);
+INSERT 10
+1: delete from hs_rr where a <= 10;
+DELETE 10
+1: update hs_rr set a = a + 100;
+UPDATE 10
+
+-- should still the initial rows {1...10}
+-1S: select * from hs_rr;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+-1S: end;
+END
+
+-- should see the results from the INSERT, DELETE and UPDATE
+-1S: begin isolation level repeatable read;
+BEGIN
+-1S: select * from hs_rr;
+ a   
+-----
+ 115 
+ 120 
+ 118 
+ 113 
+ 114 
+ 112 
+ 116 
+ 119 
+ 111 
+ 117 
+(10 rows)
+
+-- standby won't see ongoing or aborted transactions either
+1: begin;
+BEGIN
+1: insert into hs_rr select * from generate_series(1,10);
+INSERT 10
+2: begin;
+BEGIN
+2: insert into hs_rr select * from generate_series(1,10);
+INSERT 10
+2: abort;
+ABORT
+
+-1S: select * from hs_rr;
+ a   
+-----
+ 114 
+ 115 
+ 120 
+ 118 
+ 113 
+ 112 
+ 116 
+ 119 
+ 111 
+ 117 
+(10 rows)
+
+1: end;
+END
+-1S: end;
+END
+
+----------------------------------------------------------------
+-- Transaction isolation is respected in subtransactions too
+----------------------------------------------------------------
+
+1: create table hs_subtrx(a int);
+CREATE
+
+-- (1) read-committed
+-1S: begin;
+BEGIN
+-1S: select count(*) from hs_subtrx;
+ count 
+-------
+ 0     
+(1 row)
+-1S: savepoint s1;
+SAVEPOINT
+
+1: insert into hs_subtrx select * from generate_series(1,10);
+INSERT 10
+
+-1S: select count(*) from hs_subtrx;
+ count 
+-------
+ 10    
+(1 row)
+-1S: savepoint s2;
+SAVEPOINT
+-1S: select count(*) from hs_subtrx;
+ count 
+-------
+ 10    
+(1 row)
+-1S: rollback to savepoint s1;
+ROLLBACK
+-1S: select count(*) from hs_subtrx;
+ count 
+-------
+ 10    
+(1 row)
+-1S: end;
+END
+
+-- (2) repeatable-read
+-1S: begin isolation level repeatable read;
+BEGIN
+-1S: select * from hs_subtrx;
+ a  
+----
+ 1  
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+-1S: savepoint s1;
+SAVEPOINT
+
+1: insert into hs_subtrx select * from generate_series(11,20);
+INSERT 10
+1: delete from hs_subtrx where a <= 10;
+DELETE 10
+1: update hs_subtrx set a = a + 100;
+UPDATE 10
+
+-1S: select * from hs_subtrx;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+-1S: savepoint s2;
+SAVEPOINT
+-1S: select * from hs_subtrx;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+-1S: rollback to savepoint s1;
+ROLLBACK
+-1S: select * from hs_subtrx;
+ a  
+----
+ 2  
+ 3  
+ 4  
+ 7  
+ 8  
+ 1  
+ 5  
+ 6  
+ 9  
+ 10 
+(10 rows)
+-1S: end;
+END
+-1S: select * from hs_subtrx;
+ a   
+-----
+ 114 
+ 115 
+ 120 
+ 118 
+ 113 
+ 112 
+ 116 
+ 119 
+ 111 
+ 117 
+(10 rows)
+
+----------------------------------------------------------------
+-- Various isolation tests that involve AO/CO table.
+----------------------------------------------------------------
+1: create table hs_ao(a int, id int unique) using ao_row;
+CREATE
+1: insert into hs_ao select 1,i from generate_series(1,10) i;
+INSERT 10
+1: begin;
+BEGIN
+1: insert into hs_ao select 2,i from generate_series(11,20) i;
+INSERT 10
+
+-- standby sees the same AO metadata as primary
+2: select * from gp_toolkit.__gp_aoseg('hs_ao');
+ segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state 
+------------+-------+-----+----------+---------------+------------------+----------+---------------+-------
+ 0          | 1     | 128 | 5        | 1             | 128              | 1        | 3             | 1     
+ 1          | 1     | 40  | 1        | 1             | 40               | 1        | 3             | 1     
+ 2          | 1     | 104 | 4        | 1             | 104              | 1        | 3             | 1     
+(3 rows)
+-1S: select * from gp_toolkit.__gp_aoseg('hs_ao');
+ segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state 
+------------+-------+-----+----------+---------------+------------------+----------+---------------+-------
+ 0          | 1     | 128 | 5        | 1             | 128              | 1        | 3             | 1     
+ 1          | 1     | 40  | 1        | 1             | 40               | 1        | 3             | 1     
+ 2          | 1     | 104 | 4        | 1             | 104              | 1        | 3             | 1     
+(3 rows)
+2: select (gp_toolkit.__gp_aoblkdir('hs_ao')).* from gp_dist_random('gp_id');
+ tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count 
+---------+-------+----------------+----------+--------------+-------------+-----------
+ (0,2)   | 1     | 0              | 0        | 1            | 0           | 4         
+ (0,2)   | 1     | 0              | 0        | 1            | 0           | 1         
+ (0,2)   | 1     | 0              | 0        | 1            | 0           | 5         
+(3 rows)
+-1S: select (gp_toolkit.__gp_aoblkdir('hs_ao')).* from gp_dist_random('gp_id');
+ tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count 
+---------+-------+----------------+----------+--------------+-------------+-----------
+ (0,2)   | 1     | 0              | 0        | 1            | 0           | 5         
+ (0,2)   | 1     | 0              | 0        | 1            | 0           | 1         
+ (0,2)   | 1     | 0              | 0        | 1            | 0           | 4         
+(3 rows)
+
+-- standby sees correct table data
+-1S: select * from hs_ao;
+ a | id 
+---+----
+ 1 | 2  
+ 1 | 3  
+ 1 | 4  
+ 1 | 7  
+ 1 | 8  
+ 1 | 1  
+ 1 | 5  
+ 1 | 6  
+ 1 | 9  
+ 1 | 10 
+(10 rows)
+
+-- standby sees the effect of vacuum
+1: end;
+END
+1: delete from hs_ao where a = 1;
+DELETE 10
+1: vacuum hs_ao;
+VACUUM
+1: select * from gp_toolkit.__gp_aoseg('hs_ao');
+ segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state 
+------------+-------+-----+----------+---------------+------------------+----------+---------------+-------
+ 2          | 1     | 0   | 0        | 0             | 0                | 3        | 3             | 1     
+ 2          | 2     | 104 | 4        | 1             | 104              | 0        | 3             | 1     
+ 0          | 1     | 0   | 0        | 0             | 0                | 3        | 3             | 1     
+ 0          | 2     | 88  | 3        | 1             | 88               | 0        | 3             | 1     
+ 1          | 1     | 0   | 0        | 0             | 0                | 3        | 3             | 1     
+ 1          | 2     | 88  | 3        | 1             | 88               | 0        | 3             | 1     
+(6 rows)
+-1S: select * from gp_toolkit.__gp_aoseg('hs_ao');
+ segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state 
+------------+-------+-----+----------+---------------+------------------+----------+---------------+-------
+ 2          | 1     | 0   | 0        | 0             | 0                | 3        | 3             | 1     
+ 2          | 2     | 104 | 4        | 1             | 104              | 0        | 3             | 1     
+ 0          | 1     | 0   | 0        | 0             | 0                | 3        | 3             | 1     
+ 0          | 2     | 88  | 3        | 1             | 88               | 0        | 3             | 1     
+ 1          | 1     | 0   | 0        | 0             | 0                | 3        | 3             | 1     
+ 1          | 2     | 88  | 3        | 1             | 88               | 0        | 3             | 1     
+(6 rows)
+-1S: select * from hs_ao;
+ a | id 
+---+----
+ 2 | 11 
+ 2 | 13 
+ 2 | 14 
+ 2 | 17 
+ 2 | 12 
+ 2 | 15 
+ 2 | 20 
+ 2 | 16 
+ 2 | 18 
+ 2 | 19 
+(10 rows)
diff --git a/src/test/isolation2/hot_standby_schedule b/src/test/isolation2/hot_standby_schedule
new file mode 100644
index 00000000000..73e0f71a84c
--- /dev/null
+++ b/src/test/isolation2/hot_standby_schedule
@@ -0,0 +1,6 @@
+test: hot_standby/setup
+test: hot_standby/basic
+test: hot_standby/transaction_isolation
+test: hot_standby/query_conflict
+test: hot_standby/faults
+test: hot_standby/teardown
diff --git a/src/test/isolation2/input/hot_standby/query_conflict.source b/src/test/isolation2/input/hot_standby/query_conflict.source
new file mode 100644
index 00000000000..5f2aee3be53
--- /dev/null
+++ b/src/test/isolation2/input/hot_standby/query_conflict.source
@@ -0,0 +1,225 @@
+-- Tests for query conflict detection and cancellation on the hot standby.
+
+----------------------------------------------------------------
+-- Various query conflcit cases for hot standy.
+--
+-- All cases are written in this pattern:
+-- 1. Start a standby transaction that will be conflicted and cancelled;
+-- 2. Start a primary transaction that will conflict it;
+-- 3. Commit the primary transaction. Since we are using remote_apply, it will 
+--     wait until the WAL is applied on the standby, which would happen only
+--     after the standby query is cancelled;
+-- 4. Run something on the standby transaction and see the conflict error, which
+--     in some cases it's ERROR, in others it's FATAL. 
+-- 5. Quit, establish a new connection, and re-run
+-- 6. Check the system view gp_stat_database_conflicts to see that the conflict
+--     has been recorded. Note that we print the max count among all segments
+--     to avoid flakiness.
+-- See https://www.postgresql.org/docs/12/hot-standby.html#HOT-STANDBY-CONFLICT for more details.
+----------------------------------------------------------------
+
+-- We assume we start the test with clean records
+-1S: select max(confl_tablespace), max(confl_lock), max(confl_snapshot), max(confl_bufferpin), max(confl_deadlock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+---------------------------------------------------------------------
+-- Conflict with explicit lock
+---------------------------------------------------------------------
+create table hs_qc_lock(a int);
+insert into hs_qc_lock select * from generate_series(1,5);
+-1S: begin;
+-1S: select * from hs_qc_lock;
+1: begin;
+1: lock table hs_qc_lock in access exclusive mode;
+1: end;
+-1S: select * from hs_qc_lock;
+-1Sq:
+-1S: select * from hs_qc_lock;
+-1S: select max(confl_lock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+---------------------------------------------------------------------
+-- Conflict with implicit lock
+---------------------------------------------------------------------
+-1S: begin;
+-1S: select * from hs_qc_lock;
+1: alter table hs_qc_lock set access method ao_row;
+-1S: select * from hs_qc_lock;
+-1Sq:
+-1S: select * from hs_qc_lock;
+-1S: select max(confl_lock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+---------------------------------------------------------------------
+-- Conflict with drop database
+---------------------------------------------------------------------
+1: create database hs_qc_dropdb;
+-1Sq:
+-1S:@db_name hs_qc_dropdb: select 1;
+1: drop database hs_qc_dropdb;
+-1S: select 1;
+-1Sq:
+-- Stats aren't counted for database conflicts. See: pgstat_recv_recoveryconflict
+
+---------------------------------------------------------------------
+-- Conflict with VACUUM (snapshot)
+---------------------------------------------------------------------
+1: create table hs_qc_vac1(a int);
+1: insert into hs_qc_vac1 select * from generate_series(1,10);
+-1S: begin transaction isolation level repeatable read;
+-1S: select count(*) from hs_qc_vac1;
+1: delete from hs_qc_vac1;
+1: vacuum hs_qc_vac1;
+-1S: select count(*) from hs_qc_vac1;
+-1Sq:
+-1S: select max(confl_snapshot) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+---------------------------------------------------------------------
+-- Conflict with VACUUM (buffer pin)
+-- VACUUM of page that the standby is still holding buffer pin on, the difference with
+-- the previous case is that here the deleted row is already invisible to the standby.
+---------------------------------------------------------------------
+1: create table hs_qc_vac2(a int);
+1: insert into hs_qc_vac2 values(2);
+1: delete from hs_qc_vac2;
+-- run select once on the standby, so the next select will fetch data from buffer
+-1S: select * from hs_qc_vac2;
+-- suspend the standby at where it just unlocks the buffer but still holds the pin
+1: select gp_inject_fault('heapgetpage_after_unlock_buffer', 'suspend','','','hs_qc_vac2',1,1,0,dbid) from gp_segment_configuration where content=0 and role='m';
+-- we'll also make sure the startup process has sent out the signal before we let the standby backend release the pin
+1: select gp_inject_fault('recovery_conflict_bufferpin_signal_sent', 'skip',dbid) from gp_segment_configuration where content=0 and role='m';
+-1S&: select * from hs_qc_vac2;
+1: vacuum hs_qc_vac2;
+-- as mentioned before, make sure startup process has sent the signal, and then let the standby proceed
+1: select gp_wait_until_triggered_fault('recovery_conflict_bufferpin_signal_sent', 1,dbid) from gp_segment_configuration where content=0 and role='m';
+1: select gp_inject_fault('recovery_conflict_bufferpin_signal_sent', 'reset',dbid) from gp_segment_configuration where content=0 and role='m';
+1: select gp_inject_fault('heapgetpage_after_unlock_buffer', 'reset',dbid) from gp_segment_configuration where content=0 and role='m';
+-- should see the conflict
+-1S<:
+-1Sq:
+-- XXX: sometimes it shows the number is 2 instead of 1. It still validates the test but it would be nice to know why.
+-1S: select max(confl_bufferpin) > 0 from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+---------------------------------------------------------------------
+-- Conflict with drop (temp) tablespace
+-- Note: regular user tablespaces won't cause conflict on the standby since the standby cannot create any objects under them.
+---------------------------------------------------------------------
+-- create tablespace
+!\retcode mkdir -p @testtablespace@/hs_tablespace_directory;
+create tablespace hs_ts location '@testtablespace@/hs_tablespace_directory';
+
+-- some prepartion on the primary
+create table hs_ts_foo (i int, j int) distributed by(i);
+insert into hs_ts_foo select i, i from generate_series(1,800000)i;
+analyze hs_ts_foo;
+
+-- make sure the standby won't run too fast and delete the temp files
+select gp_inject_fault('after_open_temp_file', 'suspend',dbid) from gp_segment_configuration where content=1 and role='m';
+
+-- on the standby, run some query that requires workfile, this example is taken
+-- from regress/temp_tablespaces test
+-1S: set temp_tablespaces = hs_ts;
+-1S: set default_tablespace = hs_ts;
+-1S: set statement_mem='2MB';
+-1S&: with a1 as (select * from hs_ts_foo), a2 as (select * from hs_ts_foo) select a1.i xx from a1 inner join a2 on a2.i = a1.i union all select count(a1.i) from a1 inner join a2 on a2.i = a1.i order by xx limit 5;
+
+-- drop tablespace, should see conflict on the hot standby
+drop tablespace hs_ts;
+select gp_inject_fault('after_open_temp_file', 'reset',dbid) from gp_segment_configuration where content=1 and role='m';
+-1S<:
+-1Sq:
+
+-- conflict has been recorded. The query has multiple slices
+-1S: select max(confl_tablespace) >= 1 from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+-- cleanup
+!\retcode rm -rf @testtablespace@/hs_tablespace_directory;
+-- Do one checkpoint. Otherwise if server restarts w/o doing checkpoint (some subsequent
+-- tests might do that), the server would complain it cannot find the directory for hs_ts.
+checkpoint;
+
+----------------------------------------------------------------
+-- Additional case to show that distributed transaction is not taken into 
+-- account w/o the help of restore-point-based distributed snapshot creation.
+----------------------------------------------------------------
+
+1: create table hs_qc_ds1(a int);
+1: insert into hs_qc_ds1 select * from generate_series(1,10);
+-- standby starts a repeatable read transaction, runs a local query that
+-- creates a distributed snapshot w/o creating QE.
+-1S: select count(*) from hs_qc_ds1;
+-1S: begin transaction isolation level repeatable read;
+-1S: select relname from pg_class where relname = 'hs_qc_ds1';
+-- primary runs VACUUM
+1: delete from hs_qc_ds1;
+1: vacuum hs_qc_ds1;
+-- The standby query in theory should be cancelled, because it started before
+-- the VACUUM. But in reality, it doesn't, and sees 0 rows, because the QE for the 
+-- SELECT below will create more recent local snapshot that does not conflict with
+-- the VACUUM, and sees the result of DELETE+VACUUM.
+-- Note: with the help of restore point, we would be able to create local snapshot 
+-- precisely corresponding to each distributed snapshot, and do conflict detection accordingly.
+-1S: select count(*) from hs_qc_ds1;
+-1S: end;
+
+----------------------------------------------------------------
+-- Test GUC hot_standby_feedback
+----------------------------------------------------------------
+!\retcode gpconfig -c hot_standby_feedback -v on;
+!\retcode gpstop -u;
+
+1: create table hs_qc_guc1(a int);
+1: insert into hs_qc_guc1 select * from generate_series(1,10);
+
+-1S: begin transaction isolation level repeatable read;
+-1S: select * from hs_qc_guc1;
+
+-- VACUUM won't cleanup this table since the standby still sees it
+1: delete from hs_qc_guc1;
+1: vacuum hs_qc_guc1;
+
+-- hot standby can still see those rows
+-1S: select * from hs_qc_guc1;
+
+-- after the conflicting read transaction ends, the next VACUUM will successfully vacuum the table
+-1S: end;
+1: vacuum hs_qc_guc1;
+-1S: select * from hs_qc_guc1;
+-1Sq:
+
+!\retcode gpconfig -r hot_standby_feedback;
+!\retcode gpstop -u;
+
+----------------------------------------------------------------
+-- Test GUC vacuum_defer_cleanup_age
+----------------------------------------------------------------
+-- Use a GUC value that's not 0, so VACUUM does not clean up
+-- recent dead rows that the hot standby might be still seeing.
+!\retcode gpconfig -c vacuum_defer_cleanup_age -v 1;
+!\retcode gpstop -u;
+
+1: create table hs_qc_guc2(a int);
+1: insert into hs_qc_guc2 select * from generate_series(1,10);
+
+-1S: begin transaction isolation level repeatable read;
+-1S: select count(*) from hs_qc_guc2;
+
+-- VACUUM won't cleanup this table since the DELETE is still within vacuum_defer_cleanup_age
+1: delete from hs_qc_guc2;
+1: vacuum hs_qc_guc2;
+
+-- showing all rows are deleted but not vacuumed
+1: select count(*) from hs_qc_guc2;
+1: set gp_select_invisible to on;
+1: select count(*) from hs_qc_guc2;
+
+-- hot standby can still query the table
+-1S: select count(*) from hs_qc_guc2;
+
+-- only if the age is reached, hot standby will see the same conflict as before
+1: create temp table tt1(a int);
+1: vacuum hs_qc_guc2;
+-1S: select count(*) from hs_qc_guc2;
+-1Sq:
+-1S: select max(confl_snapshot) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+
+!\retcode gpconfig -r vacuum_defer_cleanup_age;
+!\retcode gpstop -u;
+
diff --git a/src/test/isolation2/output/hot_standby/query_conflict.source b/src/test/isolation2/output/hot_standby/query_conflict.source
new file mode 100644
index 00000000000..909d2532df3
--- /dev/null
+++ b/src/test/isolation2/output/hot_standby/query_conflict.source
@@ -0,0 +1,470 @@
+-- Tests for query conflict detection and cancellation on the hot standby.
+
+----------------------------------------------------------------
+-- Various query conflcit cases for hot standy.
+--
+-- All cases are written in this pattern:
+-- 1. Start a standby transaction that will be conflicted and cancelled;
+-- 2. Start a primary transaction that will conflict it;
+-- 3. Commit the primary transaction. Since we are using remote_apply, it will
+--     wait until the WAL is applied on the standby, which would happen only
+--     after the standby query is cancelled;
+-- 4. Run something on the standby transaction and see the conflict error, which
+--     in some cases it's ERROR, in others it's FATAL.
+-- 5. Quit, establish a new connection, and re-run
+-- 6. Check the system view gp_stat_database_conflicts to see that the conflict
+--     has been recorded. Note that we print the max count among all segments
+--     to avoid flakiness.
+-- See https://www.postgresql.org/docs/12/hot-standby.html#HOT-STANDBY-CONFLICT for more details.
+----------------------------------------------------------------
+
+-- We assume we start the test with clean records
+-1S: select max(confl_tablespace), max(confl_lock), max(confl_snapshot), max(confl_bufferpin), max(confl_deadlock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ max | max | max | max | max 
+-----+-----+-----+-----+-----
+ 0   | 0   | 0   | 0   | 0   
+(1 row)
+
+---------------------------------------------------------------------
+-- Conflict with explicit lock
+---------------------------------------------------------------------
+create table hs_qc_lock(a int);
+CREATE
+insert into hs_qc_lock select * from generate_series(1,5);
+INSERT 5
+-1S: begin;
+BEGIN
+-1S: select * from hs_qc_lock;
+ a 
+---
+ 2 
+ 3 
+ 4 
+ 1 
+ 5 
+(5 rows)
+1: begin;
+BEGIN
+1: lock table hs_qc_lock in access exclusive mode;
+LOCK
+1: end;
+END
+-1S: select * from hs_qc_lock;
+FATAL:  terminating connection due to conflict with recovery
+DETAIL:  User was holding a relation lock for too long.
+HINT:  In a moment you should be able to reconnect to the database and repeat your command.
+server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+-1Sq: ... <quitting>
+-1S: select * from hs_qc_lock;
+ a 
+---
+ 1 
+ 5 
+ 2 
+ 3 
+ 4 
+(5 rows)
+-1S: select max(confl_lock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ max 
+-----
+ 1   
+(1 row)
+
+---------------------------------------------------------------------
+-- Conflict with implicit lock
+---------------------------------------------------------------------
+-1S: begin;
+BEGIN
+-1S: select * from hs_qc_lock;
+ a 
+---
+ 1 
+ 5 
+ 2 
+ 3 
+ 4 
+(5 rows)
+1: alter table hs_qc_lock set access method ao_row;
+ALTER
+-1S: select * from hs_qc_lock;
+FATAL:  terminating connection due to conflict with recovery
+DETAIL:  User was holding a relation lock for too long.
+HINT:  In a moment you should be able to reconnect to the database and repeat your command.
+server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+-1Sq: ... <quitting>
+-1S: select * from hs_qc_lock;
+ a 
+---
+ 1 
+ 5 
+ 2 
+ 3 
+ 4 
+(5 rows)
+-1S: select max(confl_lock) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ max 
+-----
+ 2   
+(1 row)
+
+---------------------------------------------------------------------
+-- Conflict with drop database
+---------------------------------------------------------------------
+1: create database hs_qc_dropdb;
+CREATE
+-1Sq: ... <quitting>
+-1S:@db_name hs_qc_dropdb: select 1;
+ ?column? 
+----------
+ 1        
+(1 row)
+1: drop database hs_qc_dropdb;
+DROP
+-1S: select 1;
+FATAL:  terminating connection due to conflict with recovery
+DETAIL:  User was connected to a database that must be dropped.
+server closed the connection unexpectedly
+	This probably means the server terminated abnormally
+	before or while processing the request.
+-1Sq: ... <quitting>
+-- Stats aren't counted for database conflicts. See: pgstat_recv_recoveryconflict
+
+---------------------------------------------------------------------
+-- Conflict with VACUUM (snapshot)
+---------------------------------------------------------------------
+1: create table hs_qc_vac1(a int);
+CREATE
+1: insert into hs_qc_vac1 select * from generate_series(1,10);
+INSERT 10
+-1S: begin transaction isolation level repeatable read;
+BEGIN
+-1S: select count(*) from hs_qc_vac1;
+ count 
+-------
+ 10    
+(1 row)
+1: delete from hs_qc_vac1;
+DELETE 10
+1: vacuum hs_qc_vac1;
+VACUUM
+-1S: select count(*) from hs_qc_vac1;
+DETAIL:  User query might have needed to see row versions that must be removed.
+ERROR:  terminating connection due to conflict with recovery
+HINT:  In a moment you should be able to reconnect to the database and repeat your command.
+-1Sq: ... <quitting>
+-1S: select max(confl_snapshot) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ max 
+-----
+ 1   
+(1 row)
+
+---------------------------------------------------------------------
+-- Conflict with VACUUM (buffer pin)
+-- VACUUM of page that the standby is still holding buffer pin on, the difference with
+-- the previous case is that here the deleted row is already invisible to the standby.
+---------------------------------------------------------------------
+1: create table hs_qc_vac2(a int);
+CREATE
+1: insert into hs_qc_vac2 values(2);
+INSERT 1
+1: delete from hs_qc_vac2;
+DELETE 1
+-- run select once on the standby, so the next select will fetch data from buffer
+-1S: select * from hs_qc_vac2;
+ a 
+---
+(0 rows)
+-- suspend the standby at where it just unlocks the buffer but still holds the pin
+1: select gp_inject_fault('heapgetpage_after_unlock_buffer', 'suspend','','','hs_qc_vac2',1,1,0,dbid) from gp_segment_configuration where content=0 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-- we'll also make sure the startup process has sent out the signal before we let the standby backend release the pin
+1: select gp_inject_fault('recovery_conflict_bufferpin_signal_sent', 'skip',dbid) from gp_segment_configuration where content=0 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-1S&: select * from hs_qc_vac2;  <waiting ...>
+1: vacuum hs_qc_vac2;
+VACUUM
+-- as mentioned before, make sure startup process has sent the signal, and then let the standby proceed
+1: select gp_wait_until_triggered_fault('recovery_conflict_bufferpin_signal_sent', 1,dbid) from gp_segment_configuration where content=0 and role='m';
+ gp_wait_until_triggered_fault 
+-------------------------------
+ Success:                      
+(1 row)
+1: select gp_inject_fault('recovery_conflict_bufferpin_signal_sent', 'reset',dbid) from gp_segment_configuration where content=0 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+1: select gp_inject_fault('heapgetpage_after_unlock_buffer', 'reset',dbid) from gp_segment_configuration where content=0 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-- should see the conflict
+-1S<:  <... completed>
+ERROR:  canceling statement due to conflict with recovery  (seg0 slice1 127.0.1.1:7005 pid=17044)
+DETAIL:  User was holding shared buffer pin for too long.
+-1Sq: ... <quitting>
+-- XXX: sometimes it shows the number is 2 instead of 1. It still validates the test but it would be nice to know why.
+-1S: select max(confl_bufferpin) > 0 from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ ?column? 
+----------
+ t        
+(1 row)
+
+---------------------------------------------------------------------
+-- Conflict with drop (temp) tablespace
+-- Note: regular user tablespaces won't cause conflict on the standby since the standby cannot create any objects under them.
+---------------------------------------------------------------------
+-- create tablespace
+!\retcode mkdir -p @testtablespace@/hs_tablespace_directory;
+(exited with code 0)
+create tablespace hs_ts location '@testtablespace@/hs_tablespace_directory';
+CREATE
+
+-- some prepartion on the primary
+create table hs_ts_foo (i int, j int) distributed by(i);
+CREATE
+insert into hs_ts_foo select i, i from generate_series(1,800000)i;
+INSERT 800000
+analyze hs_ts_foo;
+ANALYZE
+
+-- make sure the standby won't run too fast and delete the temp files
+select gp_inject_fault('after_open_temp_file', 'suspend',dbid) from gp_segment_configuration where content=1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+
+-- on the standby, run some query that requires workfile, this example is taken
+-- from regress/temp_tablespaces test
+-1S: set temp_tablespaces = hs_ts;
+SET
+-1S: set default_tablespace = hs_ts;
+SET
+-1S: set statement_mem='2MB';
+SET
+-1S&: with a1 as (select * from hs_ts_foo), a2 as (select * from hs_ts_foo) select a1.i xx from a1 inner join a2 on a2.i = a1.i union all select count(a1.i) from a1 inner join a2 on a2.i = a1.i order by xx limit 5;  <waiting ...>
+
+-- drop tablespace, should see conflict on the hot standby
+drop tablespace hs_ts;
+DROP
+select gp_inject_fault('after_open_temp_file', 'reset',dbid) from gp_segment_configuration where content=1 and role='m';
+ gp_inject_fault 
+-----------------
+ Success:        
+(1 row)
+-1S<:  <... completed>
+ERROR:  canceling statement due to conflict with recovery  (seg1 slice3 127.0.1.1:7006 pid=990)
+DETAIL:  User was or might have been using tablespace that must be dropped.
+-1Sq: ... <quitting>
+
+-- conflict has been recorded. The query has multiple slices
+-1S: select max(confl_tablespace) >= 1 from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ ?column? 
+----------
+ t        
+(1 row)
+
+-- cleanup
+!\retcode rm -rf @testtablespace@/hs_tablespace_directory;
+GP_IGNORE:-- start_ignore
+GP_IGNORE:
+GP_IGNORE:-- end_ignore
+(exited with code 0)
+-- Do one checkpoint. Otherwise if server restarts w/o doing checkpoint (some subsequent
+-- tests might do that), the server would complain it cannot find the directory for hs_ts.
+checkpoint;
+CHECKPOINT
+
+----------------------------------------------------------------
+-- Additional case to show that distributed transaction is not taken into
+-- account w/o the help of restore-point-based distributed snapshot creation.
+----------------------------------------------------------------
+
+1: create table hs_qc_ds1(a int);
+CREATE
+1: insert into hs_qc_ds1 select * from generate_series(1,10);
+INSERT 10
+-- standby starts a repeatable read transaction, runs a local query that
+-- creates a distributed snapshot w/o creating QE.
+-1S: select count(*) from hs_qc_ds1;
+ count 
+-------
+ 10    
+(1 row)
+-1S: begin transaction isolation level repeatable read;
+BEGIN
+-1S: select relname from pg_class where relname = 'hs_qc_ds1';
+ relname   
+-----------
+ hs_qc_ds1 
+(1 row)
+-- primary runs VACUUM
+1: delete from hs_qc_ds1;
+DELETE 10
+1: vacuum hs_qc_ds1;
+VACUUM
+-- The standby query in theory should be cancelled, because it started before
+-- the VACUUM. But in reality, it doesn't, and sees 0 rows, because the QE for the
+-- SELECT below will create more recent local snapshot that does not conflict with
+-- the VACUUM, and sees the result of DELETE+VACUUM.
+-- Note: with the help of restore point, we would be able to create local snapshot
+-- precisely corresponding to each distributed snapshot, and do conflict detection accordingly.
+-1S: select count(*) from hs_qc_ds1;
+ count 
+-------
+ 0     
+(1 row)
+-1S: end;
+END
+
+----------------------------------------------------------------
+-- Test GUC hot_standby_feedback
+----------------------------------------------------------------
+!\retcode gpconfig -c hot_standby_feedback -v on;
+(exited with code 0)
+!\retcode gpstop -u;
+(exited with code 0)
+
+1: create table hs_qc_guc1(a int);
+CREATE
+1: insert into hs_qc_guc1 select * from generate_series(1,10);
+INSERT 10
+
+-1S: begin transaction isolation level repeatable read;
+BEGIN
+-1S: select * from hs_qc_guc1;
+ a  
+----
+ 1  
+ 10 
+ 2  
+ 3  
+ 4  
+ 5  
+ 6  
+ 7  
+ 8  
+ 9  
+(10 rows)
+
+-- VACUUM won't cleanup this table since the standby still sees it
+1: delete from hs_qc_guc1;
+DELETE 10
+1: vacuum hs_qc_guc1;
+VACUUM
+
+-- hot standby can still see those rows
+-1S: select * from hs_qc_guc1;
+ a  
+----
+ 1  
+ 10 
+ 2  
+ 3  
+ 4  
+ 5  
+ 6  
+ 7  
+ 8  
+ 9  
+(10 rows)
+
+-- after the conflicting read transaction ends, the next VACUUM will successfully vacuum the table
+-1S: end;
+END
+1: vacuum hs_qc_guc1;
+VACUUM
+-1S: select * from hs_qc_guc1;
+ a 
+---
+(0 rows)
+-1Sq: ... <quitting>
+
+!\retcode gpconfig -r hot_standby_feedback;
+(exited with code 0)
+!\retcode gpstop -u;
+(exited with code 0)
+
+----------------------------------------------------------------
+-- Test GUC vacuum_defer_cleanup_age
+----------------------------------------------------------------
+-- Use a GUC value that's not 0, so VACUUM does not clean up
+-- recent dead rows that the hot standby might be still seeing.
+!\retcode gpconfig -c vacuum_defer_cleanup_age -v 1;
+(exited with code 0)
+!\retcode gpstop -u;
+(exited with code 0)
+
+1: create table hs_qc_guc2(a int);
+CREATE
+1: insert into hs_qc_guc2 select * from generate_series(1,10);
+INSERT 10
+
+-1S: begin transaction isolation level repeatable read;
+BEGIN
+-1S: select count(*) from hs_qc_guc2;
+ count 
+-------
+ 10    
+(1 row)
+
+-- VACUUM won't cleanup this table since the DELETE is still within vacuum_defer_cleanup_age
+1: delete from hs_qc_guc2;
+DELETE 10
+1: vacuum hs_qc_guc2;
+VACUUM
+
+-- showing all rows are deleted but not vacuumed
+1: select count(*) from hs_qc_guc2;
+ count 
+-------
+ 0     
+(1 row)
+1: set gp_select_invisible to on;
+SET
+1: select count(*) from hs_qc_guc2;
+ count 
+-------
+ 10    
+(1 row)
+
+-- hot standby can still query the table
+-1S: select count(*) from hs_qc_guc2;
+ count 
+-------
+ 10    
+(1 row)
+
+-- only if the age is reached, hot standby will see the same conflict as before
+1: create temp table tt1(a int);
+CREATE
+1: vacuum hs_qc_guc2;
+VACUUM
+-1S: select count(*) from hs_qc_guc2;
+ERROR:  terminating connection due to conflict with recovery  (seg0 slice1 127.0.1.1:7005 pid=18713)
+DETAIL:  User query might have needed to see row versions that must be removed.
+HINT:  In a moment you should be able to reconnect to the database and repeat your command.
+-1Sq: ... <quitting>
+-1S: select max(confl_snapshot) from gp_stat_database_conflicts where datname = 'isolation2-hot-standby';
+ max 
+-----
+ 2   
+(1 row)
+
+!\retcode gpconfig -r vacuum_defer_cleanup_age;
+(exited with code 0)
+!\retcode gpstop -u;
+(exited with code 0)
+
diff --git a/src/test/isolation2/sql/.gitignore b/src/test/isolation2/sql/.gitignore
index 361b986e18d..bfc3709082c 100644
--- a/src/test/isolation2/sql/.gitignore
+++ b/src/test/isolation2/sql/.gitignore
@@ -7,6 +7,7 @@
 /pt_io_in_progress_deadlock.sql
 /distributed_snapshot.sql
 /local_directory_table_mixed.sql
+/hot_standby/query_conflict.sql
 
 # ignores including sub-directories
 autovacuum-analyze.sql
diff --git a/src/test/isolation2/sql/hot_standby/basic.sql b/src/test/isolation2/sql/hot_standby/basic.sql
new file mode 100644
index 00000000000..a900b38a29c
--- /dev/null
+++ b/src/test/isolation2/sql/hot_standby/basic.sql
@@ -0,0 +1,95 @@
+-- Tests for basic query dispatch on a hot standy.
+
+-- hot standby must show on and the sync mode is remote_apply for the tests to make sense
+-1S: show hot_standby;
+-1S: show synchronous_commit;
+
+-- will be checking if QD/QE info looks good
+-1S: select id, type, content, port from gp_backend_info();
+
+----------------------------------------------------------------
+-- Test: basic query dispatch
+----------------------------------------------------------------
+create table hs_t1(a int);
+create table hs_t2(a int);
+
+-- standby should see the results for 2pc immediately.
+insert into hs_t1 select * from generate_series(1,10);
+-1S: select * from hs_t1;
+-- standby won't see results for the last 1pc immediately because the standby QD
+-- isn't aware of of it so its distributed snapshot doesn't include the 1pc, but
+-- as long as another 2pc comes it will be able to see the previous 1pc. Wee 
+-- tolerate this case in the mirrored cluster setup.
+insert into hs_t2 values(1);
+-1S: select * from hs_t2;
+-- any following 2pc will make the 1pc visible
+create temp table tt(a int);
+-1S: select * from hs_t2;
+
+-- we have three QEs launched on the mirror segments.
+-- note that the first QE on a segment is still a "writer" because we
+-- need it to manage locks, same as read-only queries on a primary QD.
+-1S: select id, type, content, port from gp_backend_info();
+
+-- should have parallel readers launched
+-1S: select * from hs_t1 join (select * from hs_t2) hs_t2 on (hs_t1 = hs_t2);
+-1S: select id, type, content, port from gp_backend_info();
+
+-- now a singleton reader added too
+-1S: select * from hs_t1 join (select oid::int from pg_class) hs_t2 on (hs_t1 = hs_t2);
+-1S: select id, type, content, port from gp_backend_info();
+
+-- un-committed result should not be seen by the standby
+begin;
+insert into hs_t1 select * from generate_series(11,20);
+
+-- standby should only see 1...10
+-1S: select * from hs_t1;
+
+end;
+
+-- standby should see 1...20 now
+-1S: select * from hs_t1;
+
+----------------------------------------------------------------
+-- Test: other things that a hot standby can do.
+--
+-- More refer to regress test 'hs_standby_allowed'.
+----------------------------------------------------------------
+-- set/reset and show GUC
+-1S: set optimizer = on;
+-1S: show optimizer;
+-1S: reset optimizer;
+-- copy command
+-1S: copy hs_t1 to '/tmp/hs_copyto.csv' csv null '';
+-- query catalogs
+-1S: select count(*) from pg_class where relname = 'hs_t1';
+-1S: select dbid,content,role,preferred_role,mode,status from gp_segment_configuration where dbid = current_setting('gp_dbid')::integer;
+-- checkpoint is allowed on standby but a restart point is created instead
+-1S: checkpoint;
+
+----------------------------------------------------------------
+-- Test: things that can't be done on a hot standby:
+-- no DML, DDL or anything that generates WAL.
+--
+-- More refer to regress test 'hs_standby_disallowed'.
+----------------------------------------------------------------
+-1S: insert into hs_t1 values(1);
+-1S: delete from hs_t1;
+-1S: update hs_t1 set a = 0;
+-1S: create table hs_t2(a int);
+-1S: create database hs_db;
+-1S: vacuum hs_t1;
+
+--
+-- No hintbit WAL generation in SELECT.
+--
+create table hs_nohintbit(a int) distributed by (a);
+insert into hs_nohintbit select generate_series (1, 10);
+-- flush the data to disk
+checkpoint;
+
+-1S: set gp_disable_tuple_hints=off;
+-- no WAL is being generated (otherwise an error would occur "cannot make new WAL entries during recovery")
+-1S: SELECT count(*) FROM hs_nohintbit;
+
diff --git a/src/test/isolation2/sql/hot_standby/faults.sql b/src/test/isolation2/sql/hot_standby/faults.sql
new file mode 100644
index 00000000000..6e25bcba272
--- /dev/null
+++ b/src/test/isolation2/sql/hot_standby/faults.sql
@@ -0,0 +1,125 @@
+-- Test system faults scenarios
+
+-- start_matchsubs
+--
+-- m/Is the server running on host.*/
+-- s/Is the server running on host "\d+.\d+.\d+.\d+" and accepting/Is the server running on host <IP> and accepting/
+-- m/(seg\d+ \d+.\d+.\d+.\d+:\d+)/
+-- s/(.*)/(seg<ID> IP:PORT)/
+-- m/ERROR:  connection to dbid 1 .*:7000 failed .*/
+-- s/ERROR:  connection to dbid 1 .*:7000 failed .*/ERROR:  connection to dbid 1 <host>:7000 failed/
+--
+-- end_matchsubs
+
+-- Let FTS detect/declare failure sooner
+!\retcode gpconfig -c gp_fts_probe_interval -v 10 --coordinatoronly;
+!\retcode gpstop -u;
+
+create table hs_failover(a int);
+insert into hs_failover select * from generate_series(1,10);
+-1S: select * from hs_failover;
+
+----------------------------------------------------------------
+-- Mirror segment fails
+----------------------------------------------------------------
+select pg_ctl(datadir, 'stop', 'immediate') from gp_segment_configuration where content=1 and role = 'm';
+
+-- make sure mirror is detected down
+create temp table hs_tt(a int);
+select gp_request_fts_probe_scan();
+
+-- will not succeed
+-1S: select * from hs_failover;
+-1Sq:
+
+-- recovery
+!\retcode gprecoverseg -aF;
+
+-- sync-up
+select wait_until_all_segments_synchronized();
+
+-- works now
+-1S: select * from hs_failover;
+
+----------------------------------------------------------------
+-- Primary segment fails
+----------------------------------------------------------------
+-- inject a fault where the mirror gets out of recovery
+select gp_inject_fault('out_of_recovery_in_startupxlog', 'skip', dbid) from gp_segment_configuration where content = 1 and role = 'm';
+
+select pg_ctl(datadir, 'stop', 'immediate') from gp_segment_configuration where content=1 and role = 'p';
+select gp_request_fts_probe_scan();
+
+-- make sure failover happens
+select dbid, content, role, preferred_role, mode, status from gp_segment_configuration where content = 1;
+select gp_wait_until_triggered_fault('out_of_recovery_in_startupxlog', 1, dbid) from gp_segment_configuration where content = 1 and role = 'p';
+select gp_inject_fault('out_of_recovery_in_startupxlog', 'reset', dbid) from gp_segment_configuration where content = 1 and role = 'p';
+
+-- On an existing standby connection, query will run but it is dispatched to the previous mirror
+-- in an existing gang. That mirror is now a primary, so it will complain and the query fails.
+-1S: select * from hs_failover;
+-1Sq:
+
+-- will fail due to downed mirror (previous primary)
+-1S: select * from hs_failover;
+-1Sq:
+
+-- bring the downed mirror up
+!\retcode gprecoverseg -aF;
+select wait_until_all_segments_synchronized();
+
+-- mirror is up
+-1S: select dbid, content, role, preferred_role, mode, status from gp_segment_configuration where content = 1;
+
+-- now the query will succeed
+-1S: select * from hs_failover;
+-1Sq:
+
+-- re-balance, bring the segments to their preferred roles
+!\retcode gprecoverseg -ar;
+select wait_until_all_segments_synchronized();
+-1S: select dbid, content, role, preferred_role, mode, status from gp_segment_configuration where content = 1;
+
+-- query runs fine still
+-1S: select * from hs_failover;
+
+----------------------------------------------------------------
+-- DTX recovery
+----------------------------------------------------------------
+-- skip FTS probe to prevent unexpected mirror promotion
+1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1;
+
+1: create table tt_hs_dtx(a int);
+
+-- inject fault to repeatedly fail the COMMIT PREPARE phase of 2PC, which ensures that the dtx cannot finish even by the dtx recovery process. 
+select gp_inject_fault_infinite('finish_commit_prepared', 'error', dbid) from gp_segment_configuration where content=1 and role='p';
+
+-- session 1 on primary QD tries to commit a DTX, but cannot finish due to the fault on a QE
+1&: insert into tt_hs_dtx select * from generate_series(1,10);
+
+-- inject a panic on primary QD, essentially restarts the primary QD
+2: select gp_inject_fault('before_read_command', 'panic', dbid) from gp_segment_configuration where content=-1 and role='p';
+2: select 1;
+
+1<:
+1q:
+2q:
+
+-- standby QD can still run query
+-1S: select * from hs_failover;
+-- it cannot see rows from the in-doubt DTX
+-1S: select * from tt_hs_dtx;
+
+-- let the failed dtx be recovered, also make sure the standby replays the forget record which signals the completion of the dtx
+-1S: select gp_inject_fault('redoDistributedForgetCommitRecord', 'skip', dbid) from gp_segment_configuration where content=-1 and role='m';
+-1S: select gp_inject_fault_infinite('finish_commit_prepared', 'reset', dbid) from gp_segment_configuration where content=1 and role='p';
+-1S: select gp_wait_until_triggered_fault('redoDistributedForgetCommitRecord', 1, dbid) from gp_segment_configuration where content=-1 and role='m';
+-1S: select gp_inject_fault('redoDistributedForgetCommitRecord', 'reset', dbid) from gp_segment_configuration where content=-1 and role='m';
+
+-- standby should see the rows from the in-doubt DTX now
+-1S: select * from tt_hs_dtx;
+
+-1S: select wait_until_all_segments_synchronized();
+1: select gp_inject_fault('before_read_command', 'reset', dbid) from gp_segment_configuration where content=-1 and role='p';
+1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1;
+
diff --git a/src/test/isolation2/sql/hot_standby/setup.sql b/src/test/isolation2/sql/hot_standby/setup.sql
new file mode 100644
index 00000000000..aa15f468b7d
--- /dev/null
+++ b/src/test/isolation2/sql/hot_standby/setup.sql
@@ -0,0 +1,9 @@
+-- setup for hot standby tests
+!\retcode gpconfig -c hot_standby -v on;
+-- let primary wait for standby to apply changes, make test less flaky
+!\retcode gpconfig -c synchronous_commit -v remote_apply;
+-- make it faster to handle query conflict
+!\retcode gpconfig -c max_standby_streaming_delay -v 1000;
+-- disable autovacuum, to not affect the manual VACUUM in the tests
+!\retcode gpconfig -c autovacuum -v off;
+!\retcode gpstop -ar;
diff --git a/src/test/isolation2/sql/hot_standby/teardown.sql b/src/test/isolation2/sql/hot_standby/teardown.sql
new file mode 100644
index 00000000000..af6fba50aed
--- /dev/null
+++ b/src/test/isolation2/sql/hot_standby/teardown.sql
@@ -0,0 +1,5 @@
+-- reset the setup for hot standby tests
+!\retcode gpconfig -r hot_standby;
+!\retcode gpconfig -r synchronous_commit;
+!\retcode gpconfig -r max_standby_streaming_delay;
+!\retcode gpstop -ar;
diff --git a/src/test/isolation2/sql/hot_standby/transaction_isolation.sql b/src/test/isolation2/sql/hot_standby/transaction_isolation.sql
new file mode 100644
index 00000000000..68945228313
--- /dev/null
+++ b/src/test/isolation2/sql/hot_standby/transaction_isolation.sql
@@ -0,0 +1,319 @@
+----------------------------------------------------------------
+-- Test transaction isolation in general, not specific to dtx
+----------------------------------------------------------------
+1: create table hs_tx(a int);
+1: insert into hs_tx select * from generate_series(1,10);
+
+1: begin;
+1: insert into hs_tx select * from generate_series(11,20);
+2: begin;
+2: insert into hs_tx select * from generate_series(21,30);
+2: abort;
+
+-- standby should only see completed transactions, not in-progress transactions, nor aborted transactions
+-1S: select * from hs_tx;
+
+1: end;
+-1S: select * from hs_tx;
+
+----------------------------------------------------------------
+-- Test isolation between hot standby query and in-progress dtx
+----------------------------------------------------------------
+
+1: create table hs_dtx1(a int);
+1: create table hs_dtx2(a int);
+
+-- inject two suspend faults:
+-- 1. on seg0, suspend before PREPARE phase of 2PC
+1: select gp_inject_fault('qe_start_prepared', 'suspend',dbid) from gp_segment_configuration where content=0 and role='p';
+1&: insert into hs_dtx1 select * from  generate_series(1,10);
+-- 2. on seg1, suspend before COMMIT phase of 2PC
+2: select gp_inject_fault('qe_start_commit_prepared', 'suspend',dbid) from gp_segment_configuration where content=1 and role='p';
+2&: insert into hs_dtx2 select * from  generate_series(1,10);
+
+-- standby should not see any rows from either dtx
+-1S: select * from hs_dtx1;
+-1S: select * from hs_dtx2;
+
+-- reset
+3: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+3: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+1<:
+2<:
+
+-- standby should see the results from the dtx now
+-1S: select * from hs_dtx1;
+-1S: select * from hs_dtx2;
+
+----------------------------------------------------------------
+-- Test DTX abort that happens in different phases
+----------------------------------------------------------------
+
+1: create table hs_abort_dtx1(a int);
+1: create table hs_abort_dtx2(a int);
+
+-- inject two errors:
+-- 1. on seg0, error out before PREPARE phase of 2PC
+1: select gp_inject_fault('qe_start_prepared', 'error', dbid) from gp_segment_configuration where content=0 and role='p';
+1: insert into hs_abort_dtx1 select * from  generate_series(1,10);
+1: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+-- 2. on seg1, error out before COMMIT phase of 2PC
+1: select gp_inject_fault('qe_start_commit_prepared', 'error', dbid) from gp_segment_configuration where content=1 and role='p';
+1: insert into hs_abort_dtx2 select * from  generate_series(1,10);
+1: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+
+-- standby should not see dtx1 which is aborted but should see dtx2 which is recovered
+-1S: select * from hs_abort_dtx1;
+-1S: select * from hs_abort_dtx2;
+
+----------------------------------------------------------------
+-- Test isolation between hot standby query and in-progress dtx,
+-- but also run more queries in between
+----------------------------------------------------------------
+1: create table hs_dtx3(a int);
+
+-- inject faults to suspend segments in 2PC
+1: select gp_inject_fault('qe_start_prepared', 'suspend', dbid) from gp_segment_configuration where content=0 and role='p';
+1&: insert into hs_dtx3 select * from  generate_series(1,10);
+2: select gp_inject_fault('qe_start_commit_prepared', 'suspend', dbid) from gp_segment_configuration where content=1 and role='p';
+2&: insert into hs_dtx3 select * from  generate_series(11,20);
+
+-- standby should not see rows in the in-progress dtx
+-1S: select * from hs_dtx3;
+
+-- now run some dtx and completed
+3: insert into hs_dtx3 values(99);
+3: create table hs_dtx4(a int);
+3: insert into hs_dtx4 select * from  generate_series(1,10);
+
+-- standby should still not see rows in the in-progress DTX, but should see the completed ones
+-1S: select * from hs_dtx3;
+-1S: select * from hs_dtx4;
+
+3: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+3: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+1<:
+2<:
+
+-- standby should see all rows now
+-1S: select * from hs_dtx3;
+
+----------------------------------------------------------------
+-- Test isolation between standby QD and in-progress dtx,
+-- but after standby QD resets and gets running DTX from checkpoint.
+----------------------------------------------------------------
+1: create table hs_t5(a int, b text);
+1: create table hs_t6(a int, b text);
+
+-- inject fault to suspend a primary right before it conducts the commit phase of 2PC,
+-- so in the subsequent INSERT, all local transactions will be committed but the dtx is not.
+1: select gp_inject_fault('qe_start_commit_prepared', 'suspend', dbid) from gp_segment_configuration where content=0 and role='p';
+1&: insert into hs_t5 select i, 'in-progress' from generate_series(1,10) i;
+
+-- now run some dtx and completed, and primary conducts a checkpoint
+2: insert into hs_t5 values(1, 'commited');
+2: insert into hs_t6 select i, 'committed' from generate_series(1,10) i;
+2: begin;
+2: insert into hs_t5 values(99, 'aborted');
+2: abort;
+2: checkpoint;
+
+-- now make the standby QD resets itself
+-1S: select gp_inject_fault('exec_simple_query_start', 'panic', dbid) from gp_segment_configuration where content=-1 and role='m';
+-1S: select 1;
+-1Sq:
+
+-- standby should still not see rows in the in-progress DTX, but should see the completed ones
+-1S: select * from hs_t5;
+-1S: select * from hs_t6;
+
+2: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+1<:
+
+-- standby should see all rows now
+-1S: select * from hs_t5;
+-1S: select * from hs_t6;
+
+-- standby should correctly see more in-progress dtx on the primary.
+-- context: previously this would be fail because the standby updates latestCompletedGxid to the
+-- bumped nextGxid from checkpoint, which is too far (so that it thinks the new dtx already completed).
+1: select gp_inject_fault('qe_start_prepared', 'suspend', dbid) from gp_segment_configuration where content=0 and role='p';
+1&: delete from hs_t5;
+2: select gp_inject_fault('qe_start_commit_prepared', 'suspend', dbid) from gp_segment_configuration where content=1 and role='p';
+2&: delete from hs_t6;
+
+-- standby should not see the effect of the deletes
+-1S: select * from hs_t5;
+-1S: select * from hs_t6;
+
+3: select gp_inject_fault('qe_start_prepared', 'reset',dbid) from gp_segment_configuration where content=0 and role='p';
+3: select gp_inject_fault('qe_start_commit_prepared', 'reset',dbid) from gp_segment_configuration where content=1 and role='p';
+
+1<:
+2<:
+
+-- standby now see those deletes
+-1S: select * from hs_t5;
+-1S: select * from hs_t6;
+
+----------------------------------------------------------------
+-- Read-committed isolation: query on hot standby should not see dtx that completed after it
+-- created distributed snapshot, but should see dtx that completed before that.
+----------------------------------------------------------------
+
+1: create table hs_rc(a int);
+1: insert into hs_rc select * from generate_series(1,10);
+
+-- case 1: suspend SELECT on the standby QD right after it created snapshot
+-1S: select gp_inject_fault('select_after_qd_create_snapshot', 'suspend', dbid) from gp_segment_configuration where content=-1 and role='m';
+-1S&: select * from hs_rc;
+
+-- new INSERT or DELETE won't be observed by the standby
+1: insert into hs_rc select * from generate_series(11,20);
+1: delete from hs_rc where a < 5;
+1: select gp_inject_fault('select_after_qd_create_snapshot', 'reset', dbid) from gp_segment_configuration where content=-1 and role='m';
+
+-- should only see the rows at the time when SELECT started (1...10).
+-1S<:
+
+-- SELECT again, should see the effect from the INSERT and DELETE now
+-1S: select * from hs_rc;
+
+-- case 2: suspend SELECT on the standby QD before creating snapshot
+-1S: select gp_inject_fault('select_before_qd_create_snapshot', 'suspend', dbid) from gp_segment_configuration where content=-1 and role='m';
+-1S&: select * from hs_rc;
+
+1: insert into hs_rc select * from generate_series(21,30);
+1: delete from hs_rc where a < 21;
+1: select gp_inject_fault('select_before_qd_create_snapshot', 'reset', dbid) from gp_segment_configuration where content=-1 and role='m';
+
+-- standby should see the effect of the INSERT and DELETE
+-1S<:
+
+----------------------------------------------------------------
+-- Read-committed isolation in the BEGIN...END block
+----------------------------------------------------------------
+
+1: truncate hs_rc;
+1: insert into hs_rc select * from generate_series(1,30);
+
+-1S: begin;
+-1S: select count(*) from hs_rc;
+
+-- have some concurrent sessions on primary QD:
+-- 1. a completed transaction
+1: delete from hs_rc where a <= 10;
+-- 3. an aborted transaction
+2: begin;
+2: delete from hs_rc where a > 10 and a <= 20;
+2: abort;
+-- 3. an ongoing transaction
+3: begin;
+3: delete from hs_rc where a > 20 and a <= 30;
+
+-- the standby should see results accordingly
+-1S: select * from hs_rc;
+-1S: end;
+
+3: end;
+-1S: select * from hs_rc;
+
+----------------------------------------------------------------
+-- Repeatable-read isolation: distributed snapshot is created at time of the 
+-- first query in transaction block. All queries in the transaction block 
+-- should only see results committed before the distributed snapshot creation.
+----------------------------------------------------------------
+
+1: create table hs_rr(a int);
+1: insert into hs_rr select * from generate_series(1,10);
+
+-1S: begin isolation level repeatable read;
+-- should see 10
+-1S: select count(*) from hs_rr;
+
+-- do some more INSERT, DELETE and UPDATE
+1: insert into hs_rr select * from generate_series(11,20);
+1: delete from hs_rr where a <= 10;
+1: update hs_rr set a = a + 100;
+
+-- should still the initial rows {1...10}
+-1S: select * from hs_rr;
+-1S: end;
+
+-- should see the results from the INSERT, DELETE and UPDATE
+-1S: begin isolation level repeatable read;
+-1S: select * from hs_rr;
+
+-- standby won't see ongoing or aborted transactions either
+1: begin;
+1: insert into hs_rr select * from generate_series(1,10);
+2: begin;
+2: insert into hs_rr select * from generate_series(1,10);
+2: abort;
+
+-1S: select * from hs_rr;
+
+1: end;
+-1S: end;
+
+----------------------------------------------------------------
+-- Transaction isolation is respected in subtransactions too
+----------------------------------------------------------------
+
+1: create table hs_subtrx(a int);
+
+-- (1) read-committed
+-1S: begin;
+-1S: select count(*) from hs_subtrx;
+-1S: savepoint s1;
+
+1: insert into hs_subtrx select * from generate_series(1,10);
+
+-1S: select count(*) from hs_subtrx;
+-1S: savepoint s2;
+-1S: select count(*) from hs_subtrx;
+-1S: rollback to savepoint s1;
+-1S: select count(*) from hs_subtrx;
+-1S: end;
+
+-- (2) repeatable-read
+-1S: begin isolation level repeatable read;
+-1S: select * from hs_subtrx;
+-1S: savepoint s1;
+
+1: insert into hs_subtrx select * from generate_series(11,20);
+1: delete from hs_subtrx where a <= 10;
+1: update hs_subtrx set a = a + 100;
+
+-1S: select * from hs_subtrx;
+-1S: savepoint s2;
+-1S: select * from hs_subtrx;
+-1S: rollback to savepoint s1;
+-1S: select * from hs_subtrx;
+-1S: end;
+-1S: select * from hs_subtrx;
+
+----------------------------------------------------------------
+-- Various isolation tests that involve AO/CO table.
+----------------------------------------------------------------
+1: create table hs_ao(a int, id int unique) using ao_row;
+1: insert into hs_ao select 1,i from generate_series(1,10) i;
+1: begin;
+1: insert into hs_ao select 2,i from generate_series(11,20) i;
+
+-- standby sees the same AO metadata as primary
+2: select * from gp_toolkit.__gp_aoseg('hs_ao');
+-1S: select * from gp_toolkit.__gp_aoseg('hs_ao');
+2: select (gp_toolkit.__gp_aoblkdir('hs_ao')).* from gp_dist_random('gp_id');
+-1S: select (gp_toolkit.__gp_aoblkdir('hs_ao')).* from gp_dist_random('gp_id');
+
+-- standby sees correct table data
+-1S: select * from hs_ao; 
+
+-- standby sees the effect of vacuum
+1: end;
+1: delete from hs_ao where a = 1;
+1: vacuum hs_ao;
+1: select * from gp_toolkit.__gp_aoseg('hs_ao');
+-1S: select * from gp_toolkit.__gp_aoseg('hs_ao');
+-1S: select * from hs_ao; 
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 262e4e74fbe..9e6d4c653b9 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -603,6 +603,50 @@ sub append_conf
 
 =pod
 
+=item $node->adjust_conf(filename, setting, value, skip_equals)
+
+Modify the named config file setting with the value. If the value is undefined,
+instead delete the setting. If the setting is not present no action is taken.
+
+This will write "$setting = $value\n" in place of the existing line,
+unless skip_equals is true, in which case it will write
+"$setting $value\n". If the value needs to be quoted it is the caller's
+responsibility to do that.
+
+=cut
+
+sub adjust_conf
+{
+    my ($self, $filename, $setting, $value, $skip_equals) = @_;
+
+    my $conffile = $self->data_dir . '/' . $filename;
+
+    my $contents = PostgreSQL::Test::Utils::slurp_file($conffile);
+    my @lines    = split(/\n/, $contents);
+    my @result;
+    my $eq = $skip_equals ? '' : '= ';
+    foreach my $line (@lines)
+    {
+        if ($line !~ /^$setting\W/)
+        {
+            push(@result, "$line\n");
+        }
+        elsif (defined $value)
+        {
+            push(@result, "$setting $eq$value\n");
+        }
+    }
+    open my $fh, ">", $conffile
+        or croak "could not write \"$conffile\": $!";
+    print $fh @result;
+    close $fh;
+
+    chmod($self->group_access() ? 0640 : 0600, $conffile)
+        or die("unable to set permissions for $conffile");
+}
+
+=pod
+
 =item $node->backup(backup_name)
 
 Create a hot backup with B<pg_basebackup> in subdirectory B<backup_name> of
diff --git a/src/test/recovery/t/101_restore_point_and_startup_pause.pl b/src/test/recovery/t/101_restore_point_and_startup_pause.pl
index cda572524c1..f59acffb7ad 100644
--- a/src/test/recovery/t/101_restore_point_and_startup_pause.pl
+++ b/src/test/recovery/t/101_restore_point_and_startup_pause.pl
@@ -1,48 +1,122 @@
-# test for pausing on startup and on a specified restore point
+# Test for pausing and resuming recovery at specific restore points,
+# both at initial startup and in a continuous fashion by advancing
+# gp_pause_on_restore_point_replay.
+
 use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 1;
+use Test::More tests => 12;
 use File::Copy;
 
-# Initialize primary node with WAL archiving setup
+# Initialize and start primary node
 my $node_primary = get_new_node('primary');
-$node_primary->init(
-    has_archiving    => 1,
-    allows_streaming => 1);
-$node_primary->append_conf('postgresql.conf', "wal_level = 'replica'");
-$node_primary->append_conf('postgresql.conf', "max_wal_senders = 10");
-my $backup_name = 'my_backup';
-
-# Start primary
+$node_primary->init(has_archiving => 1, allows_streaming => 1);
 $node_primary->start;
 
-# Initialize standby node from backup, fetching WAL from archives
-$node_primary->backup($backup_name);
-my $node_standby = get_new_node('standby');
-$node_standby->init_from_backup($node_primary, $backup_name,
-    has_restoring => 1);
-$node_standby->append_conf('postgresql.conf', "gp_pause_on_restore_point_replay = on");
+my $node_standby = get_new_node("standby");
+
+sub test_pause_in_recovery
+{
+	my ($restore_point, $test_lsn, $num_rows) = @_;
+
+	# Wait until standby has replayed enough data
+	my $caughtup_query = "SELECT pg_last_wal_replay_lsn() = '$test_lsn'::pg_lsn";
+	$node_standby->poll_query_until('postgres', $caughtup_query)
+		or die "Timed out while waiting for standby to catch up";
+
+	# Check data has been replayed
+	my $result = $node_standby->safe_psql('postgres', "SELECT count(*) FROM table_foo;");
+	is($result, $num_rows, "check standby content for $restore_point");
+	ok($node_standby->safe_psql('postgres', 'SELECT pg_is_wal_replay_paused();') eq 't',
+		"standby is paused in recovery on $restore_point");
+}
+
+# Create data before taking the backup
+$node_primary->safe_psql('postgres', "CREATE TABLE table_foo AS SELECT generate_series(1,1000);");
+# Take backup from which all operations will be run
+$node_primary->backup('my_backup');
+my $lsn0 = $node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp0');");
+# Switching WAL guarantees that the restore point is available to the standby
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal();");
+
+# Add more data, create restore points and switch wal to guarantee
+# that the restore point is available to the standby
+
+# rp1
+$node_primary->safe_psql('postgres', "INSERT INTO table_foo VALUES (generate_series(1001,2000))");
+my $lsn1 = $node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp1');");
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal();");
+
+# rp2
+$node_primary->safe_psql('postgres', "INSERT INTO table_foo VALUES (generate_series(2001, 3000))");
+my $lsn2 = $node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp2');");
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal();");
+
+# rp3
+$node_primary->safe_psql('postgres', "INSERT INTO table_foo VALUES (generate_series(3001, 4000))");
+$node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp3');");
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal();");
 
-# Start standby
+# rp4
+$node_primary->safe_psql('postgres', "INSERT INTO table_foo VALUES (generate_series(4001, 5000))");
+my $lsn4 = $node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp4');");
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal();");
+
+# rp5
+$node_primary->safe_psql('postgres', "INSERT INTO table_foo VALUES (generate_series(5001, 6000))");
+$node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp5');");
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal();");
+
+# Restore the backup
+$node_standby->init_from_backup($node_primary, 'my_backup', has_restoring => 1);
+# Enable `hot_standby`
+$node_standby->append_conf('postgresql.conf', qq(hot_standby = 'on'));
+
+# Set rp0 as a restore point to pause on start up
+$node_standby->append_conf('postgresql.conf', qq(gp_pause_on_restore_point_replay = 'rp0'));
+# Start the standby
 $node_standby->start;
+test_pause_in_recovery('rp0', $lsn0, 1000);
+
+# Advance to rp1
+$node_standby->adjust_conf('postgresql.conf', 'gp_pause_on_restore_point_replay', "rp1");
+$node_standby->reload;
+$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume();");
+test_pause_in_recovery('rp1', $lsn1, 2000);
+
+# Advance to rp2
+$node_standby->adjust_conf('postgresql.conf', 'gp_pause_on_restore_point_replay', "rp2");
+$node_standby->reload;
+$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume();");
+test_pause_in_recovery('rp2', $lsn2, 3000);
+
+# Verify that a restart will bring us back to rp2
+$node_standby->restart;
+test_pause_in_recovery('rp2', $lsn2, 3000);
+
+# Skip rp3 and advance to rp4
+$node_standby->adjust_conf('postgresql.conf', 'gp_pause_on_restore_point_replay', "rp4");
+$node_standby->reload;
+$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume();");
+test_pause_in_recovery('rp4', $lsn4, 5000);
+
+# Do not advance to rp5; signal promote and then resume recovery
+$node_standby->safe_psql('postgres', "SELECT pg_promote(false);");
+$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume();");
 
-# Create a restore point on the primary
-my $restore_point_lsn =
-    $node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('rp')");
+# Wait for standby to promote
+$node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery();")
+	or die "Timed out while waiting for standby to exit recovery";
 
-# Force archival of WAL file to make it present on standby
-$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");
+# Check that we promoted with rp4's table count and not rp5's
+my $result = $node_standby->safe_psql('postgres', "SELECT count(*) FROM table_foo;");
+is($result, 5000, "check standby content after promotion");
 
-# Wait until enough replay has been done on the standby before checking if replay
-# is paused at the restore point
-my $caughtup_query =
-    "SELECT '$restore_point_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
-$node_standby->poll_query_until('postgres', $caughtup_query)
-    or die "Timed out while waiting for standby to catch up";
+# Make sure the former standby is now writable
+$node_standby->safe_psql('postgres', "INSERT INTO table_foo VALUES (generate_series(6001, 7000));");
+$result = $node_standby->safe_psql('postgres', "SELECT count(*) FROM table_foo;");
+is($result, 6000, "check standby is writable after promotion");
 
-my $paused_at_restore_point_query =
-    "SELECT pg_is_wal_replay_paused() and pg_last_wal_replay_lsn() = '$restore_point_lsn'::pg_lsn";
-my $result2 = $node_standby->safe_psql('postgres', $paused_at_restore_point_query);
-is($result2, qq(t), 'check if WAL replay is paused at restore point');
+$node_primary->teardown_node;
+$node_standby->teardown_node;
diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile
index dffdba2572a..8271d5fb171 100644
--- a/src/test/regress/GNUmakefile
+++ b/src/test/regress/GNUmakefile
@@ -239,6 +239,16 @@ endif
 standbycheck: all
 	$(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/standby_schedule  $(EXTRA_TESTS)
 
+# GPDB: installcheck for hot standby. This is essentially same as the upstream 'standbycheck'
+# above but we just make sure that we do the primary preparation and use the desired standby port.
+# If no standby port is given, just use the demo cluster's standby port 7001. 
+ifeq ($(STANDBY_PGPORT),)
+  STANDBY_PGPORT = 7001
+endif
+installcheck-hot-standby: all
+	$(pg_regress_installcheck) $(REGRESS_OPTS) hs_primary_setup
+	$(pg_regress_installcheck) $(REGRESS_OPTS) --port=$(STANDBY_PGPORT) --use-existing --schedule=$(srcdir)/standby_schedule  $(EXTRA_TESTS)
+
 # old interfaces follow...
 
 runcheck: check
diff --git a/src/test/regress/expected/hs_primary_setup.out b/src/test/regress/expected/hs_primary_setup.out
new file mode 100644
index 00000000000..0184b2b73e9
--- /dev/null
+++ b/src/test/regress/expected/hs_primary_setup.out
@@ -0,0 +1,19 @@
+--
+-- Hot Standby tests
+--
+-- hs_primary_setup.sql
+--
+drop table if exists hs1;
+create table hs1 (col1 integer primary key);
+insert into hs1 values (1);
+drop table if exists hs2;
+create table hs2 (col1 integer primary key);
+insert into hs2 values (12);
+insert into hs2 values (13);
+drop table if exists hs3;
+create table hs3 (col1 integer primary key);
+insert into hs3 values (113);
+insert into hs3 values (114);
+insert into hs3 values (115);
+DROP sequence if exists hsseq;
+create sequence hsseq;
diff --git a/src/test/regress/expected/hs_standby_allowed.out b/src/test/regress/expected/hs_standby_allowed.out
index 00b8faf9eb6..e6b6514642f 100644
--- a/src/test/regress/expected/hs_standby_allowed.out
+++ b/src/test/regress/expected/hs_standby_allowed.out
@@ -164,31 +164,25 @@ show synchronous_commit;
 reset synchronous_commit;
 discard temp;
 discard all;
+NOTICE:  command without clusterwide effect
+HINT:  Consider alternatives as DEALLOCATE ALL, or DISCARD TEMP if a clusterwide effect is desired.
 -- CURSOR commands
 BEGIN;
-DECLARE hsc CURSOR FOR select * from hs3;
+DECLARE hsc CURSOR FOR select * from hs3 order by col1 asc;
 FETCH next from hsc;
  col1 
 ------
   113
 (1 row)
 
-fetch first from hsc;
- col1 
-------
-  113
-(1 row)
-
-fetch last from hsc;
- col1 
-------
-  115
-(1 row)
-
+-- GPDB: backward fetch isn't allowed, moved to hs_standby_disallowed
+-- fetch first from hsc;
+-- fetch last from hsc;
 fetch 1 from hsc;
  col1 
 ------
-(0 rows)
+  114
+(1 row)
 
 CLOSE hsc;
 COMMIT;
@@ -216,3 +210,5 @@ UNLISTEN *;
 -- ALLOWED COMMANDS
 CHECKPOINT;
 discard all;
+NOTICE:  command without clusterwide effect
+HINT:  Consider alternatives as DEALLOCATE ALL, or DISCARD TEMP if a clusterwide effect is desired.
diff --git a/src/test/regress/expected/hs_standby_disallowed.out b/src/test/regress/expected/hs_standby_disallowed.out
index 8d3cafa5cec..0a62e40e743 100644
--- a/src/test/regress/expected/hs_standby_disallowed.out
+++ b/src/test/regress/expected/hs_standby_disallowed.out
@@ -11,9 +11,15 @@ commit;
 WARNING:  there is no transaction in progress
 -- SELECT
 select * from hs1 FOR SHARE;
-ERROR:  cannot execute SELECT FOR SHARE in a read-only transaction
+ERROR:  cannot acquire lock mode ExclusiveLock on database objects while recovery is in progress
+LINE 1: select * from hs1 FOR SHARE;
+                      ^
+HINT:  Only RowExclusiveLock or less can be acquired on database objects during recovery.
 select * from hs1 FOR UPDATE;
-ERROR:  cannot execute SELECT FOR UPDATE in a read-only transaction
+ERROR:  cannot acquire lock mode ExclusiveLock on database objects while recovery is in progress
+LINE 1: select * from hs1 FOR UPDATE;
+                      ^
+HINT:  Only RowExclusiveLock or less can be acquired on database objects during recovery.
 -- DML
 BEGIN;
 insert into hs1 values (37);
@@ -21,11 +27,17 @@ ERROR:  cannot execute INSERT in a read-only transaction
 ROLLBACK;
 BEGIN;
 delete from hs1 where col1 = 1;
-ERROR:  cannot execute DELETE in a read-only transaction
+ERROR:  cannot acquire lock mode ExclusiveLock on database objects while recovery is in progress
+LINE 1: delete from hs1 where col1 = 1;
+                    ^
+HINT:  Only RowExclusiveLock or less can be acquired on database objects during recovery.
 ROLLBACK;
 BEGIN;
 update hs1 set col1 = NULL where col1 > 0;
-ERROR:  cannot execute UPDATE in a read-only transaction
+ERROR:  cannot acquire lock mode ExclusiveLock on database objects while recovery is in progress
+LINE 1: update hs1 set col1 = NULL where col1 > 0;
+               ^
+HINT:  Only RowExclusiveLock or less can be acquired on database objects during recovery.
 ROLLBACK;
 BEGIN;
 truncate hs3;
@@ -131,3 +143,15 @@ REVOKE SELECT ON hs1 FROM PUBLIC;
 ERROR:  cannot execute REVOKE in a read-only transaction
 GRANT SELECT ON hs1 TO PUBLIC;
 ERROR:  cannot execute GRANT in a read-only transaction
+-- GPDB: backward fetch is not supported, moved from hs_standby_allowed.
+BEGIN;
+DECLARE hsc CURSOR FOR select * from hs3 order by col1 asc;
+fetch next from hsc;
+ col1 
+------
+  113
+(1 row)
+
+fetch first from hsc;
+ERROR:  backward scan is not supported in this version of Apache Cloudberry
+COMMIT;
diff --git a/src/test/regress/expected/hs_standby_functions.out b/src/test/regress/expected/hs_standby_functions.out
index ce846b758bf..48cb480f47a 100644
--- a/src/test/regress/expected/hs_standby_functions.out
+++ b/src/test/regress/expected/hs_standby_functions.out
@@ -27,13 +27,16 @@ select * from pg_prepared_xacts;
 -------------+-----+----------+-------+----------
 (0 rows)
 
--- just the startup process
-select locktype, virtualxid, virtualtransaction, mode, granted
+-- just the startup processes of all standby coordinator and segments, since pg_locks show cluster-wide view
+select gp_segment_id, locktype, virtualxid, virtualtransaction, mode, granted
 from pg_locks where virtualxid = '1/1';
-  locktype  | virtualxid | virtualtransaction |     mode      | granted 
-------------+------------+--------------------+---------------+---------
- virtualxid | 1/1        | 1/0                | ExclusiveLock | t
-(1 row)
+ gp_segment_id |  locktype  | virtualxid | virtualtransaction |     mode      | granted 
+---------------+------------+------------+--------------------+---------------+---------
+            -1 | virtualxid | 1/1        | 1/0                | ExclusiveLock | t
+             0 | virtualxid | 1/1        | 1/0                | ExclusiveLock | t
+             1 | virtualxid | 1/1        | 1/0                | ExclusiveLock | t
+             2 | virtualxid | 1/1        | 1/0                | ExclusiveLock | t
+(4 rows)
 
 -- suicide is painless
 select pg_cancel_backend(pg_backend_pid());
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index 3b9e91136d4..9320cf0aeec 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -3615,9 +3615,20 @@ cluster_healthy(void)
 		return false;
 	}
 
+	char *p;
+	/* skip if the instance is hot standby */
+	psql_command_output("postgres", line, sizeof(line),
+			"SELECT pg_is_in_recovery();");
+	p = &line[0];
+	while (*p == ' ')
+		p++;
+	if (*p == 't')
+	{
+		return !halt_work;
+	}
+
 	i = 120;
 	do {
-		char *p;
 		/* check for the health for standby coordinator */
 		psql_command_output("postgres", line, sizeof(line),
 							"SELECT sync_state FROM pg_stat_get_wal_senders();");
diff --git a/src/test/regress/sql/hs_primary_setup.sql b/src/test/regress/sql/hs_primary_setup.sql
index eeb4421307f..83403299fd5 100644
--- a/src/test/regress/sql/hs_primary_setup.sql
+++ b/src/test/regress/sql/hs_primary_setup.sql
@@ -22,4 +22,11 @@ insert into hs3 values (115);
 DROP sequence if exists hsseq;
 create sequence hsseq;
 
+-- start_ignore
 SELECT pg_switch_wal();
+
+-- GPDB: enable hot_standby for this cluster
+\! gpconfig -c hot_standby -v on;
+\! gpstop -ari;
+
+-- end_ignore
diff --git a/src/test/regress/sql/hs_standby_allowed.sql b/src/test/regress/sql/hs_standby_allowed.sql
index 6debddc5e99..873f3ef8643 100644
--- a/src/test/regress/sql/hs_standby_allowed.sql
+++ b/src/test/regress/sql/hs_standby_allowed.sql
@@ -82,11 +82,12 @@ discard all;
 
 BEGIN;
 
-DECLARE hsc CURSOR FOR select * from hs3;
+DECLARE hsc CURSOR FOR select * from hs3 order by col1 asc;
 
 FETCH next from hsc;
-fetch first from hsc;
-fetch last from hsc;
+-- GPDB: backward fetch isn't allowed, moved to hs_standby_disallowed
+-- fetch first from hsc;
+-- fetch last from hsc;
 fetch 1 from hsc;
 
 CLOSE hsc;
diff --git a/src/test/regress/sql/hs_standby_disallowed.sql b/src/test/regress/sql/hs_standby_disallowed.sql
index a470600eec8..72066e2d40b 100644
--- a/src/test/regress/sql/hs_standby_disallowed.sql
+++ b/src/test/regress/sql/hs_standby_disallowed.sql
@@ -101,3 +101,11 @@ REINDEX TABLE hs2;
 
 REVOKE SELECT ON hs1 FROM PUBLIC;
 GRANT SELECT ON hs1 TO PUBLIC;
+
+-- GPDB: backward fetch is not supported, moved from hs_standby_allowed.
+BEGIN;
+DECLARE hsc CURSOR FOR select * from hs3 order by col1 asc;
+fetch next from hsc;
+fetch first from hsc;
+COMMIT;
+
diff --git a/src/test/regress/sql/hs_standby_functions.sql b/src/test/regress/sql/hs_standby_functions.sql
index b57f67ff8b5..903c8f96037 100644
--- a/src/test/regress/sql/hs_standby_functions.sql
+++ b/src/test/regress/sql/hs_standby_functions.sql
@@ -16,8 +16,8 @@ select pg_stop_backup();
 -- should return no rows
 select * from pg_prepared_xacts;
 
--- just the startup process
-select locktype, virtualxid, virtualtransaction, mode, granted
+-- just the startup processes of all standby coordinator and segments, since pg_locks show cluster-wide view
+select gp_segment_id, locktype, virtualxid, virtualtransaction, mode, granted
 from pg_locks where virtualxid = '1/1';
 
 -- suicide is painless