18
18
package org .apache .flink .connector .kinesis .sink ;
19
19
20
20
import org .apache .flink .annotation .Internal ;
21
+ import org .apache .flink .annotation .VisibleForTesting ;
21
22
import org .apache .flink .api .connector .sink2 .Sink ;
22
23
import org .apache .flink .connector .aws .util .AWSClientUtil ;
23
24
import org .apache .flink .connector .aws .util .AWSGeneralUtil ;
32
33
import org .apache .flink .connector .base .sink .writer .strategy .RateLimitingStrategy ;
33
34
import org .apache .flink .metrics .Counter ;
34
35
import org .apache .flink .metrics .groups .SinkWriterMetricGroup ;
36
+ import org .apache .flink .util .Preconditions ;
35
37
36
38
import org .slf4j .Logger ;
37
39
import org .slf4j .LoggerFactory ;
43
45
import software .amazon .awssdk .services .kinesis .model .PutRecordsResultEntry ;
44
46
import software .amazon .awssdk .services .kinesis .model .ResourceNotFoundException ;
45
47
48
+ import java .io .IOException ;
46
49
import java .util .ArrayList ;
47
50
import java .util .Collection ;
48
51
import java .util .Collections ;
52
+ import java .util .HashMap ;
49
53
import java .util .List ;
54
+ import java .util .Map ;
50
55
import java .util .Properties ;
51
56
import java .util .concurrent .CompletableFuture ;
52
57
@@ -96,11 +101,8 @@ class KinesisStreamsSinkWriter<InputT> extends AsyncSinkWriter<InputT, PutRecord
96
101
/* The sink writer metric group */
97
102
private final SinkWriterMetricGroup metrics ;
98
103
99
- /* The asynchronous http client for the asynchronous Kinesis client */
100
- private final SdkAsyncHttpClient httpClient ;
101
-
102
- /* The asynchronous Kinesis client - construction is by kinesisClientProperties */
103
- private final KinesisAsyncClient kinesisClient ;
104
+ /* The client provider */
105
+ private KinesisClientProvider kinesisClientProvider ;
104
106
105
107
/* Flag to whether fatally fail any time we encounter an exception when persisting records */
106
108
private final boolean failOnError ;
@@ -148,6 +150,36 @@ class KinesisStreamsSinkWriter<InputT> extends AsyncSinkWriter<InputT, PutRecord
148
150
String streamArn ,
149
151
Properties kinesisClientProperties ,
150
152
Collection <BufferedRequestState <PutRecordsRequestEntry >> states ) {
153
+ this (
154
+ elementConverter ,
155
+ context ,
156
+ maxBatchSize ,
157
+ maxInFlightRequests ,
158
+ maxBufferedRequests ,
159
+ maxBatchSizeInBytes ,
160
+ maxTimeInBufferMS ,
161
+ maxRecordSizeInBytes ,
162
+ failOnError ,
163
+ streamName ,
164
+ streamArn ,
165
+ states ,
166
+ createDefaultClientProvider (kinesisClientProperties ));
167
+ }
168
+
169
+ KinesisStreamsSinkWriter (
170
+ ElementConverter <InputT , PutRecordsRequestEntry > elementConverter ,
171
+ Sink .InitContext context ,
172
+ int maxBatchSize ,
173
+ int maxInFlightRequests ,
174
+ int maxBufferedRequests ,
175
+ long maxBatchSizeInBytes ,
176
+ long maxTimeInBufferMS ,
177
+ long maxRecordSizeInBytes ,
178
+ boolean failOnError ,
179
+ String streamName ,
180
+ String streamArn ,
181
+ Collection <BufferedRequestState <PutRecordsRequestEntry >> states ,
182
+ KinesisClientProvider kinesisClientProvider ) {
151
183
super (
152
184
elementConverter ,
153
185
context ,
@@ -167,11 +199,48 @@ class KinesisStreamsSinkWriter<InputT> extends AsyncSinkWriter<InputT, PutRecord
167
199
this .streamArn = streamArn ;
168
200
this .metrics = context .metricGroup ();
169
201
this .numRecordsOutErrorsCounter = metrics .getNumRecordsOutErrorsCounter ();
170
- this .httpClient = AWSGeneralUtil .createAsyncHttpClient (kinesisClientProperties );
171
- this .kinesisClient = buildClient (kinesisClientProperties , this .httpClient );
202
+ setKinesisClientProvider (kinesisClientProvider );
203
+ }
204
+
205
+ /**
206
+ * Create a default KinesisClientProvider to manage the Kinesis client and HTTP client.
207
+ *
208
+ * @param kinesisClientProperties Properties for configuring the Kinesis client
209
+ * @return A KinesisClientProvider implementation
210
+ */
211
+ private static KinesisClientProvider createDefaultClientProvider (Properties kinesisClientProperties ) {
212
+ return new KinesisClientProvider () {
213
+ private final SdkAsyncHttpClient httpClient =
214
+ AWSGeneralUtil .createAsyncHttpClient (kinesisClientProperties );
215
+ private final KinesisAsyncClient kinesisClient =
216
+ buildClient (kinesisClientProperties , httpClient );
217
+
218
+ @ Override
219
+ public KinesisAsyncClient get () {
220
+ return kinesisClient ;
221
+ }
222
+
223
+ @ Override
224
+ public void close () {
225
+ AWSGeneralUtil .closeResources (httpClient , kinesisClient );
226
+ }
227
+ };
172
228
}
173
229
174
- private KinesisAsyncClient buildClient (
230
+ /**
231
+ * Set a custom KinesisAsyncClient provider for testing purposes. This method is only intended
232
+ * to be used in tests.
233
+ *
234
+ * @param kinesisClientProvider The provider that supplies the KinesisAsyncClient
235
+ */
236
+ @ VisibleForTesting
237
+ void setKinesisClientProvider (KinesisClientProvider kinesisClientProvider ) {
238
+ this .kinesisClientProvider =
239
+ Preconditions .checkNotNull (
240
+ kinesisClientProvider , "The kinesisClientProvider must not be null." );
241
+ }
242
+
243
+ private static KinesisAsyncClient buildClient (
175
244
Properties kinesisClientProperties , SdkAsyncHttpClient httpClient ) {
176
245
AWSGeneralUtil .validateAwsCredentials (kinesisClientProperties );
177
246
@@ -208,6 +277,7 @@ protected void submitRequestEntries(
208
277
.streamARN (streamArn )
209
278
.build ();
210
279
280
+ KinesisAsyncClient kinesisClient = kinesisClientProvider .get ();
211
281
CompletableFuture <PutRecordsResponse > future = kinesisClient .putRecords (batchRequest );
212
282
213
283
future .whenComplete (
@@ -232,7 +302,7 @@ private void handleFullyFailedRequest(
232
302
List <PutRecordsRequestEntry > requestEntries ,
233
303
ResultHandler <PutRecordsRequestEntry > resultHandler ) {
234
304
LOG .warn (
235
- "KDS Sink failed to write and will retry {} entries to KDS" ,
305
+ "Kinesis Data Stream Sink failed to write and will retry {} entries to KDS" ,
236
306
requestEntries .size (),
237
307
err );
238
308
numRecordsOutErrorsCounter .inc (requestEntries .size ());
@@ -244,34 +314,117 @@ private void handleFullyFailedRequest(
244
314
245
315
@ Override
246
316
public void close () {
247
- AWSGeneralUtil .closeResources (httpClient , kinesisClient );
317
+ try {
318
+ kinesisClientProvider .close ();
319
+ } catch (IOException e ) {
320
+ throw new KinesisStreamsException ("Failed to close the kinesisClientProvider" , e );
321
+ }
248
322
}
249
323
250
324
private void handlePartiallyFailedRequest (
251
325
PutRecordsResponse response ,
252
326
List <PutRecordsRequestEntry > requestEntries ,
253
327
ResultHandler <PutRecordsRequestEntry > resultHandler ) {
254
- LOG .warn (
255
- "KDS Sink failed to write and will retry {} entries to KDS" ,
256
- response .failedRecordCount ());
257
- numRecordsOutErrorsCounter .inc (response .failedRecordCount ());
328
+ int failedRecordCount = response .failedRecordCount ();
329
+ LOG .warn ("Kinesis Data Stream Sink failed to write and will retry {} entries to KDS" , failedRecordCount );
330
+ numRecordsOutErrorsCounter .inc (failedRecordCount );
258
331
259
332
if (failOnError ) {
260
333
resultHandler .completeExceptionally (
261
334
new KinesisStreamsException .KinesisStreamsFailFastException ());
262
335
return ;
263
336
}
264
- List < PutRecordsRequestEntry > failedRequestEntries =
265
- new ArrayList <>(response . failedRecordCount () );
337
+
338
+ List < PutRecordsRequestEntry > failedRequestEntries = new ArrayList <>(failedRecordCount );
266
339
List <PutRecordsResultEntry > records = response .records ();
267
340
341
+ // Collect error information and build the list of failed entries
342
+ Map <String , ErrorSummary > errorSummaries =
343
+ collectErrorSummaries (records , requestEntries , failedRequestEntries );
344
+
345
+ // Log aggregated error information
346
+ logErrorSummaries (errorSummaries );
347
+
348
+ // Return the failed entries for retry
349
+ resultHandler .retryForEntries (failedRequestEntries );
350
+ }
351
+
352
+ /**
353
+ * Collect error summaries from failed records and build a list of failed request entries.
354
+ *
355
+ * @param records The result entries from the Kinesis response
356
+ * @param requestEntries The original request entries
357
+ * @param failedRequestEntries List to populate with failed entries (modified as a side effect)
358
+ * @return A map of error codes to their summaries
359
+ */
360
+ private Map <String , ErrorSummary > collectErrorSummaries (
361
+ List <PutRecordsResultEntry > records ,
362
+ List <PutRecordsRequestEntry > requestEntries ,
363
+ List <PutRecordsRequestEntry > failedRequestEntries ) {
364
+
365
+ // We capture error info while minimizing logging overhead in the data path,
366
+ // which is critical for maintaining throughput performance
367
+ Map <String , ErrorSummary > errorSummaries = new HashMap <>();
368
+
268
369
for (int i = 0 ; i < records .size (); i ++) {
269
- if (records .get (i ).errorCode () != null ) {
370
+ PutRecordsResultEntry resultEntry = records .get (i );
371
+ String errorCode = resultEntry .errorCode ();
372
+
373
+ if (errorCode != null ) {
374
+ // Track the frequency of each error code to identify patterns
375
+ ErrorSummary summary =
376
+ errorSummaries .computeIfAbsent (
377
+ errorCode , code -> new ErrorSummary (resultEntry .errorMessage ()));
378
+ summary .incrementCount ();
379
+
270
380
failedRequestEntries .add (requestEntries .get (i ));
271
381
}
272
382
}
273
383
274
- resultHandler .retryForEntries (failedRequestEntries );
384
+ return errorSummaries ;
385
+ }
386
+
387
+ /**
388
+ * Log aggregated error information at WARN level.
389
+ *
390
+ * @param errorSummaries Map of error codes to their summaries
391
+ */
392
+ private void logErrorSummaries (Map <String , ErrorSummary > errorSummaries ) {
393
+ // We log aggregated error information at WARN level to ensure visibility in production
394
+ // while avoiding the performance impact of logging each individual failure
395
+ if (!errorSummaries .isEmpty ()) {
396
+ // Using a single WARN log with aggregated information provides operational
397
+ // visibility into errors without flooding logs in high-throughput scenarios
398
+ LOG .warn ("Kinesis Data Stream Sink failed to write, Errors summary: " + errorSummaries );
399
+ }
400
+ }
401
+
402
+ /** Helper class to store error summary information. */
403
+ static class ErrorSummary {
404
+ private final String exampleMessage ;
405
+ private int count ;
406
+
407
+ ErrorSummary (String exampleMessage ) {
408
+ this .exampleMessage = exampleMessage ;
409
+ this .count = 0 ;
410
+ }
411
+
412
+ void incrementCount () {
413
+ count ++;
414
+ }
415
+
416
+ int getCount () {
417
+ return count ;
418
+ }
419
+
420
+ String getExampleMessage () {
421
+ return exampleMessage ;
422
+ }
423
+
424
+ @ Override
425
+ public String toString () {
426
+ return String .format ("[%d records, example: %s]" , count , exampleMessage );
427
+ }
275
428
}
276
429
277
430
private boolean isRetryable (
0 commit comments