@@ -15,11 +15,13 @@ import (
1515 "go.temporal.io/api/workflowservice/v1"
1616 "go.temporal.io/sdk/activity"
1717 "go.temporal.io/sdk/temporal"
18+ "go.temporal.io/server/api/adminservice/v1"
1819 enumsspb "go.temporal.io/server/api/enums/v1"
1920 "go.temporal.io/server/api/historyservice/v1"
2021 replicationspb "go.temporal.io/server/api/replication/v1"
2122 serverClient "go.temporal.io/server/client"
2223 "go.temporal.io/server/common/definition"
24+ "go.temporal.io/server/common/dynamicconfig"
2325 "go.temporal.io/server/common/headers"
2426 "go.temporal.io/server/common/log"
2527 "go.temporal.io/server/common/log/tag"
@@ -100,18 +102,20 @@ type (
100102 }
101103
102104 activities struct {
103- historyShardCount int32
104- executionManager persistence.ExecutionManager
105- taskManager persistence.TaskManager
106- namespaceRegistry namespace.Registry
107- historyClient historyservice.HistoryServiceClient
108- frontendClient workflowservice.WorkflowServiceClient
109- clientFactory serverClient.Factory
110- clientBean serverClient.Bean
111- logger log.Logger
112- metricsHandler metrics.Handler
113- forceReplicationMetricsHandler metrics.Handler
114- namespaceReplicationQueue persistence.NamespaceReplicationQueue
105+ historyShardCount int32
106+ executionManager persistence.ExecutionManager
107+ taskManager persistence.TaskManager
108+ namespaceRegistry namespace.Registry
109+ historyClient historyservice.HistoryServiceClient
110+ frontendClient workflowservice.WorkflowServiceClient
111+ adminClient adminservice.AdminServiceClient
112+ clientFactory serverClient.Factory
113+ clientBean serverClient.Bean
114+ logger log.Logger
115+ metricsHandler metrics.Handler
116+ forceReplicationMetricsHandler metrics.Handler
117+ namespaceReplicationQueue persistence.NamespaceReplicationQueue
118+ generateMigrationTaskViaFrontend dynamicconfig.BoolPropertyFn
115119 }
116120)
117121
@@ -336,7 +340,15 @@ func (a *activities) checkHandoverOnce(ctx context.Context, waitRequest waitHand
336340 return readyShardCount == len (resp .Shards ), nil
337341}
338342
339- func (a * activities ) generateWorkflowReplicationTask (ctx context.Context , rateLimiter quotas.RateLimiter , wKey definition.WorkflowKey , targetClusters []string ) error {
343+ func (a * activities ) generateWorkflowReplicationTask (
344+ ctx context.Context ,
345+ rateLimiter quotas.RateLimiter ,
346+ namespaceName string ,
347+ namespaceID string ,
348+ we * commonpb.WorkflowExecution ,
349+ targetClusters []string ,
350+ generateViaFrontend bool ,
351+ ) error {
340352 if err := rateLimiter .WaitN (ctx , 1 ); err != nil {
341353 return err
342354 }
@@ -345,24 +357,36 @@ func (a *activities) generateWorkflowReplicationTask(ctx context.Context, rateLi
345357 ctx , cancel := context .WithTimeout (ctx , time .Second * 10 )
346358 defer cancel ()
347359
348- resp , err := a .historyClient .GenerateLastHistoryReplicationTasks (ctx , & historyservice.GenerateLastHistoryReplicationTasksRequest {
349- NamespaceId : wKey .NamespaceID ,
350- Execution : & commonpb.WorkflowExecution {
351- WorkflowId : wKey .WorkflowID ,
352- RunId : wKey .RunID ,
353- },
354- TargetClusters : targetClusters ,
355- })
356-
357- if err != nil {
358- return err
360+ var stateTransitionCount , historyLength int64
361+ if generateViaFrontend {
362+ resp , err := a .adminClient .GenerateLastHistoryReplicationTasks (ctx , & adminservice.GenerateLastHistoryReplicationTasksRequest {
363+ Namespace : namespaceName ,
364+ Execution : we ,
365+ TargetClusters : targetClusters ,
366+ })
367+ if err != nil {
368+ return err
369+ }
370+ stateTransitionCount = resp .StateTransitionCount
371+ historyLength = resp .HistoryLength
372+ } else {
373+ resp , err := a .historyClient .GenerateLastHistoryReplicationTasks (ctx , & historyservice.GenerateLastHistoryReplicationTasksRequest {
374+ NamespaceId : namespaceID ,
375+ Execution : we ,
376+ TargetClusters : targetClusters ,
377+ })
378+ if err != nil {
379+ return err
380+ }
381+ stateTransitionCount = resp .StateTransitionCount
382+ historyLength = resp .HistoryLength
359383 }
360384
361385 // If workflow has many activity retries (bug in activity code e.g.,), the state transition count can be
362386 // large but the number of actual state transition that is applied on target cluster can be very small.
363387 // Take the minimum between StateTransitionCount and HistoryLength as heuristic to avoid unnecessary throttling
364388 // in such situation.
365- count := min (resp . StateTransitionCount , resp . HistoryLength )
389+ count := min (stateTransitionCount , historyLength )
366390 for count > 0 {
367391 token := min (int (count ), rateLimiter .Burst ())
368392 count -= int64 (token )
@@ -475,27 +499,38 @@ func (a *activities) GenerateReplicationTasks(ctx context.Context, request *gene
475499 }
476500 }
477501
502+ namespaceName , err := a .namespaceRegistry .GetNamespaceName (namespace .ID (request .NamespaceID ))
503+ if err != nil {
504+ a .logger .Error ("force-replication failed to translate namespaceID to name" , tag .WorkflowNamespaceID (request .NamespaceID ))
505+ return err
506+ }
507+
508+ generateViaFrontend := a .generateMigrationTaskViaFrontend ()
478509 for i := startIndex ; i < len (request .Executions ); i ++ {
479- var executionCandidates []definition.WorkflowKey
480- executionCandidates = []definition.WorkflowKey {definition .NewWorkflowKey (request .NamespaceID , request .Executions [i ].GetWorkflowId (), request .Executions [i ].GetRunId ())}
481-
482- for _ , we := range executionCandidates {
483- if err := a .generateWorkflowReplicationTask (ctx , rateLimiter , we , request .TargetClusters ); err != nil {
484- if ! isNotFoundServiceError (err ) {
485- a .logger .Error ("force-replication failed to generate replication task" ,
486- tag .WorkflowNamespaceID (we .GetNamespaceID ()),
487- tag .WorkflowID (we .GetWorkflowID ()),
488- tag .WorkflowRunID (we .GetRunID ()),
489- tag .Error (err ))
490- return err
491- }
492-
493- a .logger .Warn ("force-replication ignore replication task due to NotFoundServiceError" ,
494- tag .WorkflowNamespaceID (we .GetNamespaceID ()),
495- tag .WorkflowID (we .GetWorkflowID ()),
496- tag .WorkflowRunID (we .GetRunID ()),
510+ we := request .Executions [i ]
511+ if err := a .generateWorkflowReplicationTask (
512+ ctx ,
513+ rateLimiter ,
514+ namespaceName .String (),
515+ request .NamespaceID ,
516+ we ,
517+ request .TargetClusters ,
518+ generateViaFrontend ,
519+ ); err != nil {
520+ if ! isNotFoundServiceError (err ) {
521+ a .logger .Error ("force-replication failed to generate replication task" ,
522+ tag .WorkflowNamespaceID (request .NamespaceID ),
523+ tag .WorkflowID (we .GetWorkflowId ()),
524+ tag .WorkflowRunID (we .GetRunId ()),
497525 tag .Error (err ))
526+ return err
498527 }
528+
529+ a .logger .Warn ("force-replication ignore replication task due to NotFoundServiceError" ,
530+ tag .WorkflowNamespaceID (request .NamespaceID ),
531+ tag .WorkflowID (we .GetWorkflowId ()),
532+ tag .WorkflowRunID (we .GetRunId ()),
533+ tag .Error (err ))
499534 }
500535 activity .RecordHeartbeat (ctx , i )
501536 }
0 commit comments