11
11
12
12
import org .apache .lucene .util .ArrayUtil ;
13
13
import org .apache .lucene .util .automaton .TooComplexToDeterminizeException ;
14
+ import org .elasticsearch .ElasticsearchTimeoutException ;
14
15
import org .elasticsearch .ExceptionsHelper ;
15
16
import org .elasticsearch .action .ActionListener ;
17
+ import org .elasticsearch .action .ActionListenerResponseHandler ;
16
18
import org .elasticsearch .action .ActionRunnable ;
17
19
import org .elasticsearch .action .ActionType ;
18
20
import org .elasticsearch .action .OriginalIndices ;
22
24
import org .elasticsearch .action .support .ChannelActionListener ;
23
25
import org .elasticsearch .action .support .HandledTransportAction ;
24
26
import org .elasticsearch .action .support .RefCountingRunnable ;
25
- import org .elasticsearch .client . internal . RemoteClusterClient ;
27
+ import org .elasticsearch .action . support . SubscribableListener ;
26
28
import org .elasticsearch .cluster .ProjectState ;
27
29
import org .elasticsearch .cluster .block .ClusterBlockLevel ;
28
30
import org .elasticsearch .cluster .metadata .IndexNameExpressionResolver ;
37
39
import org .elasticsearch .common .util .concurrent .ThrottledTaskRunner ;
38
40
import org .elasticsearch .core .Nullable ;
39
41
import org .elasticsearch .core .Releasable ;
42
+ import org .elasticsearch .core .TimeValue ;
40
43
import org .elasticsearch .core .Tuple ;
41
44
import org .elasticsearch .index .shard .ShardId ;
42
45
import org .elasticsearch .indices .IndicesService ;
48
51
import org .elasticsearch .tasks .Task ;
49
52
import org .elasticsearch .threadpool .ThreadPool ;
50
53
import org .elasticsearch .transport .RemoteClusterAware ;
51
- import org .elasticsearch .transport .RemoteClusterService ;
54
+ import org .elasticsearch .transport .Transport ;
52
55
import org .elasticsearch .transport .TransportChannel ;
53
56
import org .elasticsearch .transport .TransportRequestHandler ;
57
+ import org .elasticsearch .transport .TransportRequestOptions ;
54
58
import org .elasticsearch .transport .TransportService ;
55
59
56
60
import java .util .ArrayList ;
@@ -91,6 +95,8 @@ public class TransportFieldCapabilitiesAction extends HandledTransportAction<Fie
91
95
92
96
private final IndicesService indicesService ;
93
97
private final boolean ccsCheckCompatibility ;
98
+ private final ThreadPool threadPool ;
99
+ private final TimeValue forceConnectTimeoutSecs ;
94
100
95
101
@ Inject
96
102
public TransportFieldCapabilitiesAction (
@@ -117,32 +123,40 @@ public TransportFieldCapabilitiesAction(
117
123
new NodeTransportHandler ()
118
124
);
119
125
this .ccsCheckCompatibility = SearchService .CCS_VERSION_CHECK_SETTING .get (clusterService .getSettings ());
126
+ this .threadPool = threadPool ;
127
+ this .forceConnectTimeoutSecs = clusterService .getSettings ().getAsTime ("search.ccs.force_connect_timeout" , null );
120
128
}
121
129
122
130
@ Override
123
131
protected void doExecute (Task task , FieldCapabilitiesRequest request , final ActionListener <FieldCapabilitiesResponse > listener ) {
124
132
executeRequest (
125
133
task ,
126
134
request ,
127
- (remoteClient , remoteRequest , remoteListener ) -> remoteClient .execute (REMOTE_TYPE , remoteRequest , remoteListener ),
135
+ (transportService , conn , fieldCapabilitiesRequest , responseHandler ) -> transportService .sendRequest (
136
+ conn ,
137
+ REMOTE_TYPE .name (),
138
+ fieldCapabilitiesRequest ,
139
+ TransportRequestOptions .EMPTY ,
140
+ responseHandler
141
+ ),
128
142
listener
129
143
);
130
144
}
131
145
132
146
public void executeRequest (
133
147
Task task ,
134
148
FieldCapabilitiesRequest request ,
135
- RemoteRequestExecutor remoteRequestExecutor ,
149
+ LinkedRequestExecutor linkedRequestExecutor ,
136
150
ActionListener <FieldCapabilitiesResponse > listener
137
151
) {
138
152
// workaround for https://github.com/elastic/elasticsearch/issues/97916 - TODO remove this when we can
139
- searchCoordinationExecutor .execute (ActionRunnable .wrap (listener , l -> doExecuteForked (task , request , remoteRequestExecutor , l )));
153
+ searchCoordinationExecutor .execute (ActionRunnable .wrap (listener , l -> doExecuteForked (task , request , linkedRequestExecutor , l )));
140
154
}
141
155
142
156
private void doExecuteForked (
143
157
Task task ,
144
158
FieldCapabilitiesRequest request ,
145
- RemoteRequestExecutor remoteRequestExecutor ,
159
+ LinkedRequestExecutor linkedRequestExecutor ,
146
160
ActionListener <FieldCapabilitiesResponse > listener
147
161
) {
148
162
if (ccsCheckCompatibility ) {
@@ -268,12 +282,6 @@ private void doExecuteForked(
268
282
for (Map .Entry <String , OriginalIndices > remoteIndices : remoteClusterIndices .entrySet ()) {
269
283
String clusterAlias = remoteIndices .getKey ();
270
284
OriginalIndices originalIndices = remoteIndices .getValue ();
271
- var remoteClusterClient = transportService .getRemoteClusterService ()
272
- .getRemoteClusterClient (
273
- clusterAlias ,
274
- singleThreadedExecutor ,
275
- RemoteClusterService .DisconnectedStrategy .RECONNECT_UNLESS_SKIP_UNAVAILABLE
276
- );
277
285
FieldCapabilitiesRequest remoteRequest = prepareRemoteRequest (clusterAlias , request , originalIndices , nowInMillis );
278
286
ActionListener <FieldCapabilitiesResponse > remoteListener = ActionListener .wrap (response -> {
279
287
for (FieldCapabilitiesIndexResponse resp : response .getIndexResponses ()) {
@@ -299,18 +307,34 @@ private void doExecuteForked(
299
307
handleIndexFailure .accept (RemoteClusterAware .buildRemoteIndexName (clusterAlias , index ), ex );
300
308
}
301
309
});
302
- remoteRequestExecutor .executeRemoteRequest (
303
- remoteClusterClient ,
304
- remoteRequest ,
310
+
311
+ SubscribableListener <Transport .Connection > connectionListener = new SubscribableListener <>();
312
+ if (forceConnectTimeoutSecs != null ) {
313
+ connectionListener .addTimeout (forceConnectTimeoutSecs , threadPool , singleThreadedExecutor );
314
+ }
315
+
316
+ connectionListener .addListener (
305
317
// The underlying transport service may call onFailure with a thread pool other than search_coordinator.
306
318
// This fork is a workaround to ensure that the merging of field-caps always occurs on the search_coordinator.
307
319
// TODO: remove this workaround after we fixed https://github.com/elastic/elasticsearch/issues/107439
308
320
new ForkingOnFailureActionListener <>(
309
321
singleThreadedExecutor ,
310
322
true ,
311
323
ActionListener .releaseAfter (remoteListener , refs .acquire ())
324
+ ).delegateFailure (
325
+ (responseListener , conn ) -> linkedRequestExecutor .executeRemoteRequest (
326
+ transportService ,
327
+ conn ,
328
+ remoteRequest ,
329
+ new ActionListenerResponseHandler <>(responseListener , FieldCapabilitiesResponse ::new , singleThreadedExecutor )
330
+ )
312
331
)
313
332
);
333
+
334
+ boolean ensureConnected = forceConnectTimeoutSecs != null
335
+ || transportService .getRemoteClusterService ().isSkipUnavailable (clusterAlias ) == false ;
336
+ transportService .getRemoteClusterService ()
337
+ .maybeEnsureConnectedAndGetConnection (clusterAlias , ensureConnected , connectionListener );
314
338
}
315
339
}
316
340
}
@@ -338,11 +362,12 @@ public void onFailure(Exception e) {
338
362
});
339
363
}
340
364
341
- public interface RemoteRequestExecutor {
365
+ public interface LinkedRequestExecutor {
342
366
void executeRemoteRequest (
343
- RemoteClusterClient remoteClient ,
367
+ TransportService transportService ,
368
+ Transport .Connection conn ,
344
369
FieldCapabilitiesRequest remoteRequest ,
345
- ActionListener <FieldCapabilitiesResponse > remoteListener
370
+ ActionListenerResponseHandler <FieldCapabilitiesResponse > responseHandler
346
371
);
347
372
}
348
373
@@ -376,8 +401,20 @@ private static void mergeIndexResponses(
376
401
} else {
377
402
// we have no responses at all, maybe because of errors
378
403
if (indexFailures .isEmpty () == false ) {
379
- // throw back the first exception
380
- listener .onFailure (failures .get (0 ).getException ());
404
+ /*
405
+ * Under no circumstances are we to pass timeout errors originating from SubscribableListener as top-level errors.
406
+ * Instead, they should always be passed through the response object, as part of "failures".
407
+ */
408
+ if (failures .stream ()
409
+ .anyMatch (
410
+ failure -> failure .getException () instanceof IllegalStateException ise
411
+ && ise .getCause () instanceof ElasticsearchTimeoutException
412
+ )) {
413
+ listener .onResponse (new FieldCapabilitiesResponse (Collections .emptyList (), failures ));
414
+ } else {
415
+ // throw back the first exception
416
+ listener .onFailure (failures .get (0 ).getException ());
417
+ }
381
418
} else {
382
419
listener .onResponse (new FieldCapabilitiesResponse (Collections .emptyList (), Collections .emptyList ()));
383
420
}
@@ -585,15 +622,24 @@ List<FieldCapabilitiesFailure> build(Set<String> successfulIndices) {
585
622
for (Map .Entry <String , Exception > failure : failuresByIndex .entrySet ()) {
586
623
String index = failure .getKey ();
587
624
Exception e = failure .getValue ();
625
+ /*
626
+ * The listener we use to briefly try, and connect to a linked cluster can throw an ElasticsearchTimeoutException
627
+ * error if it cannot be reached. To make sure we correctly recognise this scenario via
628
+ * ExceptionsHelper.isRemoteUnavailableException(), we wrap this error appropriately.
629
+ */
630
+ if (e instanceof ElasticsearchTimeoutException ete ) {
631
+ e = new IllegalStateException ("Unable to open any connections" , ete );
632
+ }
588
633
589
634
if (successfulIndices .contains (index ) == false ) {
590
635
// we deduplicate exceptions on the underlying causes message and classname
591
636
// we unwrap the cause to e.g. group RemoteTransportExceptions coming from different nodes if the cause is the same
592
637
Throwable cause = ExceptionsHelper .unwrapCause (e );
593
638
Tuple <String , String > groupingKey = new Tuple <>(cause .getMessage (), cause .getClass ().getName ());
639
+ Exception ex = e ;
594
640
indexFailures .compute (
595
641
groupingKey ,
596
- (k , v ) -> v == null ? new FieldCapabilitiesFailure (new String [] { index }, e ) : v .addIndex (index )
642
+ (k , v ) -> v == null ? new FieldCapabilitiesFailure (new String [] { index }, ex ) : v .addIndex (index )
597
643
);
598
644
}
599
645
}
0 commit comments