apache
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/LakeTieringJobBuilder.java‎
Lines changed: 7 additions & 6 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/LakeTieringJobBuilder.java‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/TieringSource.java‎
Lines changed: 12 additions & 11 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/TieringSource.java‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/TieringSourceFetcherManager.java‎
Lines changed: 3 additions & 2 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/TieringSourceFetcherManager.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/TieringSplitReader.java‎
Lines changed: 6 additions & 4 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/TieringSplitReader.java‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/enumerator/TieringSourceEnumerator.java‎
Lines changed: 78 additions & 29 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/tiering/source/enumerator/TieringSourceEnumerator.java‎
Lines changed: 78 additions & 29 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/tiering/FlinkTieringTestBase.java‎
Lines changed: 0 additions & 7 deletions b/‎fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/tiering/FlinkTieringTestBase.java‎
Lines changed: 0 additions & 7 deletions
@@ -17,12 +17,6 @@
 
 package org.apache.fluss.flink.tiering;
 
-import org.apache.flink.api.common.eventtime.WatermarkStrategy;
-import org.apache.flink.configuration.PipelineOptions;
-import org.apache.flink.core.execution.JobClient;
-import org.apache.flink.streaming.api.datastream.DataStreamSource;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.apache.flink.streaming.api.functions.sink.v2.DiscardingSink;
 import org.apache.fluss.config.Configuration;
 import org.apache.fluss.flink.tiering.committer.CommittableMessageTypeInfo;
 import org.apache.fluss.flink.tiering.committer.TieringCommitOperatorFactory;
@@ -33,6 +27,13 @@
 import org.apache.fluss.lake.lakestorage.LakeStoragePluginSetUp;
 import org.apache.fluss.lake.writer.LakeTieringFactory;
 
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
+import org.apache.flink.configuration.PipelineOptions;
+import org.apache.flink.core.execution.JobClient;
+import org.apache.flink.streaming.api.datastream.DataStreamSource;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.api.functions.sink.v2.DiscardingSink;
+
 import static org.apache.fluss.flink.tiering.source.TieringSource.TIERING_SOURCE_TRANSFORMATION_UID;
 import static org.apache.fluss.flink.tiering.source.TieringSourceOptions.POLL_TIERING_TABLE_INTERVAL;
 import static org.apache.fluss.utils.Preconditions.checkNotNull;
 
@@ -17,17 +17,6 @@
 
 package org.apache.fluss.flink.tiering.source;
 
-import org.apache.flink.api.connector.source.Boundedness;
-import org.apache.flink.api.connector.source.Source;
-import org.apache.flink.api.connector.source.SourceReader;
-import org.apache.flink.api.connector.source.SourceReaderContext;
-import org.apache.flink.api.connector.source.SplitEnumerator;
-import org.apache.flink.api.connector.source.SplitEnumeratorContext;
-import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
-import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
-import org.apache.flink.core.io.SimpleVersionedSerializer;
-import org.apache.flink.runtime.jobgraph.OperatorID;
-import org.apache.flink.streaming.api.graph.StreamGraphHasherV2;
 import org.apache.fluss.client.Connection;
 import org.apache.fluss.client.ConnectionFactory;
 import org.apache.fluss.config.Configuration;
@@ -41,6 +30,18 @@
 import org.apache.fluss.shaded.guava32.com.google.common.hash.Hasher;
 import org.apache.fluss.shaded.guava32.com.google.common.hash.Hashing;
 
+import org.apache.flink.api.connector.source.Boundedness;
+import org.apache.flink.api.connector.source.Source;
+import org.apache.flink.api.connector.source.SourceReader;
+import org.apache.flink.api.connector.source.SourceReaderContext;
+import org.apache.flink.api.connector.source.SplitEnumerator;
+import org.apache.flink.api.connector.source.SplitEnumeratorContext;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.runtime.jobgraph.OperatorID;
+import org.apache.flink.streaming.api.graph.StreamGraphHasherV2;
+
 import java.nio.charset.StandardCharsets;
 
 import static org.apache.fluss.flink.tiering.source.TieringSourceOptions.POLL_TIERING_TABLE_INTERVAL;
 
@@ -18,14 +18,15 @@
 
 package org.apache.fluss.flink.tiering.source;
 
+import org.apache.fluss.flink.adapter.SingleThreadFetcherManagerAdapter;
+import org.apache.fluss.flink.tiering.source.split.TieringSplit;
+
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.fetcher.SplitFetcher;
 import org.apache.flink.connector.base.source.reader.fetcher.SplitFetcherTask;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
-import org.apache.fluss.flink.adapter.SingleThreadFetcherManagerAdapter;
-import org.apache.fluss.flink.tiering.source.split.TieringSplit;
 
 import java.util.Collection;
 import java.util.function.Consumer;
 
@@ -17,10 +17,6 @@
 
 package org.apache.fluss.flink.tiering.source;
 
-import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
-import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
-import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition;
-import org.apache.flink.connector.base.source.reader.splitreader.SplitsChange;
 import org.apache.fluss.annotation.VisibleForTesting;
 import org.apache.fluss.client.Connection;
 import org.apache.fluss.client.table.Table;
@@ -38,10 +34,16 @@
 import org.apache.fluss.metadata.TableInfo;
 import org.apache.fluss.metadata.TablePath;
 import org.apache.fluss.utils.CloseableIterator;
+
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitsChange;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.time.Duration;
 import java.util.ArrayDeque;
 
@@ -17,13 +17,6 @@
 
 package org.apache.fluss.flink.tiering.source.enumerator;
 
-import org.apache.flink.api.connector.source.ReaderInfo;
-import org.apache.flink.api.connector.source.SourceEvent;
-import org.apache.flink.api.connector.source.SplitEnumerator;
-import org.apache.flink.api.connector.source.SplitEnumeratorContext;
-import org.apache.flink.api.java.tuple.Tuple3;
-import org.apache.flink.metrics.groups.SplitEnumeratorMetricGroup;
-import org.apache.flink.util.FlinkRuntimeException;
 import org.apache.fluss.annotation.VisibleForTesting;
 import org.apache.fluss.client.Connection;
 import org.apache.fluss.client.ConnectionFactory;
@@ -48,10 +41,19 @@
 import org.apache.fluss.rpc.messages.PbLakeTieringTableInfo;
 import org.apache.fluss.rpc.metrics.ClientMetricGroup;
 import org.apache.fluss.utils.MapUtils;
+
+import org.apache.flink.api.connector.source.ReaderInfo;
+import org.apache.flink.api.connector.source.SourceEvent;
+import org.apache.flink.api.connector.source.SplitEnumerator;
+import org.apache.flink.api.connector.source.SplitEnumeratorContext;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.metrics.groups.SplitEnumeratorMetricGroup;
+import org.apache.flink.util.FlinkRuntimeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -102,7 +104,7 @@ public class TieringSourceEnumerator
 
     private final Map<Long, Long> tieringTableEpochs;
     private final Map<Long, Long> failedTableEpochs;
-    private final Map<Long, Long> finishedTableEpochs;
+    private final Map<Long, TieringFinishState> finishedTables;
     private final Set<Long> tieringReachMaxDurationsTables;
 
     // lazily instantiated
@@ -131,7 +133,7 @@ public TieringSourceEnumerator(
         this.pendingSplits = Collections.synchronizedList(new ArrayList<>());
         this.readersAwaitingSplit = Collections.synchronizedSet(new TreeSet<>());
         this.tieringTableEpochs = MapUtils.newConcurrentHashMap();
-        this.finishedTableEpochs = MapUtils.newConcurrentHashMap();
+        this.finishedTables = MapUtils.newConcurrentHashMap();
         this.failedTableEpochs = MapUtils.newConcurrentHashMap();
         this.tieringReachMaxDurationsTables = Collections.synchronizedSet(new TreeSet<>());
     }
@@ -179,8 +181,24 @@ public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname
         }
         LOG.info("TieringSourceReader {} requests split.", subtaskId);
         readersAwaitingSplit.add(subtaskId);
-        this.context.callAsync(
-                this::requestTieringTableSplitsViaHeartBeat, this::generateAndAssignSplits);
+
+        // If pending splits exist, assign them directly to the requesting reader
+        if (!pendingSplits.isEmpty()) {
+            assignSplits();
+        } else {
+            // Note: Ideally, only one table should be tiering at a time.
+            // Here we block to request a tiering table synchronously to avoid multiple threads
+            // requesting tiering tables concurrently, which would cause the enumerator to contain
+            // multiple tiering tables simultaneously. This is not optimal for tiering performance.
+            Tuple3<Long, Long, TablePath> tieringTable = null;
+            Throwable throwable = null;
+            try {
+                tieringTable = this.requestTieringTableSplitsViaHeartBeat();
+            } catch (Throwable t) {
+                throwable = t;
+            }
+            this.generateAndAssignSplits(tieringTable, throwable);
+        }
     }
 
     @Override
@@ -252,7 +270,9 @@ public void handleSourceEvent(int subtaskId, SourceEvent sourceEvent) {
                         "The finished table {} is not in tiering table, won't report it to Fluss to mark as finished.",
                         finishedTableId);
             } else {
-                finishedTableEpochs.put(finishedTableId, tieringEpoch);
+                boolean isForceComplete = tieringReachMaxDurationsTables.remove(finishedTableId);
+                finishedTables.put(
+                        finishedTableId, TieringFinishState.from(tieringEpoch, isForceComplete));
             }
         }
 
@@ -274,7 +294,7 @@ public void handleSourceEvent(int subtaskId, SourceEvent sourceEvent) {
             }
         }
 
-        if (!finishedTableEpochs.isEmpty() || !failedTableEpochs.isEmpty()) {
+        if (!finishedTables.isEmpty() || !failedTableEpochs.isEmpty()) {
             // call one round of heartbeat to notify table has been finished or failed
             this.context.callAsync(
                     this::requestTieringTableSplitsViaHeartBeat, this::generateAndAssignSplits);
@@ -288,6 +308,7 @@ private void handleSourceReaderFailOver() {
         // we need to make all as failed
         failedTableEpochs.putAll(new HashMap<>(tieringTableEpochs));
         tieringTableEpochs.clear();
+        tieringReachMaxDurationsTables.clear();
         // also clean all pending splits since we mark all as failed
         pendingSplits.clear();
         if (!failedTableEpochs.isEmpty()) {
@@ -298,22 +319,18 @@ private void handleSourceReaderFailOver() {
     }
 
     @VisibleForTesting
-    protected void handleTableTieringReachMaxDuration(long tableId, long tieringEpoch) {
+    protected void handleTableTieringReachMaxDuration(
+            TablePath tablePath, long tableId, long tieringEpoch) {
         Long currentEpoch = tieringTableEpochs.get(tableId);
         if (currentEpoch != null && currentEpoch.equals(tieringEpoch)) {
-            LOG.info("Table {} reached max duration. Force completing.", tableId);
+            LOG.info("Table {}-{} reached max duration. Force completing.", tablePath, tableId);
             tieringReachMaxDurationsTables.add(tableId);
 
             for (TieringSplit tieringSplit : pendingSplits) {
                 if (tieringSplit.getTableBucket().getTableId() == tableId) {
                     // mark this tiering split to skip the current round since the tiering for
                     // this table has timed out, so the tiering source reader can skip them directly
                     tieringSplit.skipCurrentRound();
-                } else {
-                    // we can break directly, if found any one split's table id is not equal to the
-                    // timeout
-                    // table, the following split must be not equal to the table id
-                    break;
                 }
             }
 
@@ -362,13 +379,13 @@ private void assignSplits() {
         if (closed) {
             return null;
         }
-        Map<Long, Long> currentFinishedTableEpochs = new HashMap<>(this.finishedTableEpochs);
+        Map<Long, TieringFinishState> currentFinishedTables = new HashMap<>(this.finishedTables);
         Map<Long, Long> currentFailedTableEpochs = new HashMap<>(this.failedTableEpochs);
         LakeTieringHeartbeatRequest tieringHeartbeatRequest =
                 tieringTableHeartBeat(
                         basicHeartBeat(),
                         this.tieringTableEpochs,
-                        currentFinishedTableEpochs,
+                        currentFinishedTables,
                         currentFailedTableEpochs,
                         this.flussCoordinatorEpoch);
 
@@ -397,9 +414,9 @@ private void assignSplits() {
             waitHeartbeatResponse(coordinatorGateway.lakeTieringHeartbeat(tieringHeartbeatRequest));
         }
 
-        // if come to here, we can remove currentFinishedTableEpochs/failedTableEpochs to avoid send
+        // if come to here, we can remove currentFinishedTables/failedTableEpochs to avoid send
         // in next round
-        currentFinishedTableEpochs.forEach(finishedTableEpochs::remove);
+        currentFinishedTables.forEach(finishedTables::remove);
         currentFailedTableEpochs.forEach(failedTableEpochs::remove);
         return lakeTieringInfo;
     }
@@ -428,7 +445,7 @@ private void generateTieringSplits(Tuple3<Long, Long, TablePath> tieringTable)
                 LOG.info(
                         "Generate Tiering splits for table {} is empty, no need to tier data.",
                         tieringTable.f2.getTableName());
-                finishedTableEpochs.put(tieringTable.f0, tieringTable.f1);
+                finishedTables.put(tieringTable.f0, TieringFinishState.from(tieringTable.f1));
             } else {
                 tieringTableEpochs.put(tieringTable.f0, tieringTable.f1);
                 pendingSplits.addAll(tieringSplits);
@@ -438,7 +455,9 @@ private void generateTieringSplits(Tuple3<Long, Long, TablePath> tieringTable)
                                 context.runInCoordinatorThread(
                                         () ->
                                                 handleTableTieringReachMaxDuration(
-                                                        tieringTable.f0, tieringTable.f1)),
+                                                        tablePath,
+                                                        tieringTable.f0,
+                                                        tieringTable.f1)),
 
                         // for simplicity, we use the freshness as
                         tableInfo.getTableConfig().getDataLakeFreshness().toMillis(),
@@ -537,16 +556,28 @@ static LakeTieringHeartbeatRequest heartBeatWithRequestNewTieringTable(
         static LakeTieringHeartbeatRequest tieringTableHeartBeat(
                 LakeTieringHeartbeatRequest heartbeatRequest,
                 Map<Long, Long> tieringTableEpochs,
-                Map<Long, Long> finishedTableEpochs,
+                Map<Long, TieringFinishState> finishedTables,
                 Map<Long, Long> failedTableEpochs,
                 int coordinatorEpoch) {
             if (!tieringTableEpochs.isEmpty()) {
                 heartbeatRequest.addAllTieringTables(
                         toPbHeartbeatReqForTable(tieringTableEpochs, coordinatorEpoch));
             }
-            if (!finishedTableEpochs.isEmpty()) {
+            if (!finishedTables.isEmpty()) {
+                Map<Long, Long> finishTieringEpochs = new HashMap<>();
+                Set<Long> forceFinishedTables = new HashSet<>();
+                finishedTables.forEach(
+                        (tableId, tieringFinishState) -> {
+                            finishTieringEpochs.put(tableId, tieringFinishState.tieringEpoch);
+                            if (tieringFinishState.isForceToFinish) {
+                                forceFinishedTables.add(tableId);
+                            }
+                        });
                 heartbeatRequest.addAllFinishedTables(
-                        toPbHeartbeatReqForTable(finishedTableEpochs, coordinatorEpoch));
+                        toPbHeartbeatReqForTable(finishTieringEpochs, coordinatorEpoch));
+                for (long forceFinishedTableId : forceFinishedTables) {
+                    heartbeatRequest.addForceFinishedTable(forceFinishedTableId);
+                }
             }
             // add failed tiering table to heart beat request
             return failedTableHeartBeat(heartbeatRequest, failedTableEpochs, coordinatorEpoch);
@@ -590,4 +621,22 @@ static LakeTieringHeartbeatResponse waitHeartbeatResponse(
             }
         }
     }
+
+    private static class TieringFinishState {
+        long tieringEpoch;
+        boolean isForceToFinish;
+
+        public static TieringFinishState from(long tieringEpoch) {
+            return new TieringFinishState(tieringEpoch, false);
+        }
+
+        public static TieringFinishState from(long tieringEpoch, boolean isForceToFinish) {
+            return new TieringFinishState(tieringEpoch, isForceToFinish);
+        }
+
+        private TieringFinishState(long tieringEpoch, boolean isForceToFinish) {
+            this.tieringEpoch = tieringEpoch;
+            this.isForceToFinish = isForceToFinish;
+        }
+    }
 }
@@ -170,13 +170,6 @@ protected void writeRows(TablePath tablePath, List<InternalRow> rows, boolean ap
         }
     }
 
-    protected void waitUntilSnapshot(long tableId, int bucketNum, long snapshotId) {
-        for (int i = 0; i < bucketNum; i++) {
-            TableBucket tableBucket = new TableBucket(tableId, i);
-            FLUSS_CLUSTER_EXTENSION.waitUntilSnapshotFinished(tableBucket, snapshotId);
-        }
-    }
-
     public List<InternalRow> getValuesRecords(TablePath tablePath) {
         return TestingValuesLake.getResults(tablePath.toString());
     }
Original file line number	Diff line number	Diff line change
`@@ -170,13 +170,6 @@ protected void writeRows(TablePath tablePath, List<InternalRow> rows, boolean ap`
`170`	`170`	`}`
`171`	`171`	`}`
`172`	`172`
`173`		`- protected void waitUntilSnapshot(long tableId, int bucketNum, long snapshotId) {`
`174`		`- for (int i = 0; i < bucketNum; i++) {`
`175`		`- TableBucket tableBucket = new TableBucket(tableId, i);`
`176`		`- FLUSS_CLUSTER_EXTENSION.waitUntilSnapshotFinished(tableBucket, snapshotId);`
`177`		`- }`
`178`		`- }`
`179`		`-`
`180`	`173`	`public List<InternalRow> getValuesRecords(TablePath tablePath) {`
`181`	`174`	`return TestingValuesLake.getResults(tablePath.toString());`
`182`	`175`	`}`