Teradata
diff --git a/‎docs/docs/spark-configuration.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/docs/spark-configuration.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/docs/spark-structured-streaming.md‎
Lines changed: 7 additions & 0 deletions b/‎docs/docs/spark-structured-streaming.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java‎
Lines changed: 33 additions & 0 deletions b/‎spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadOptions.java‎
Lines changed: 13 additions & 0 deletions b/‎spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkReadOptions.java‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkSQLProperties.java‎
Lines changed: 5 additions & 0 deletions b/‎spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/SparkSQLProperties.java‎
Lines changed: 5 additions & 0 deletions
@@ -196,6 +196,7 @@ val spark = SparkSession.builder()
 | spark.sql.iceberg.executor-cache.locality.enabled      | false                                                          | Enables locality-aware executor cache usage                                                                                     |
 | spark.sql.iceberg.merge-schema                         | false                                                          | Enables modifying the table schema to match the write schema. Only adds columns missing columns                                 |
 | spark.sql.iceberg.report-column-stats                  | true                                                           | Report Puffin Table Statistics if available to Spark's Cost Based Optimizer. CBO must be enabled for this to be effective       |
+| spark.sql.iceberg.async-micro-batch-planning-enabled   | false                                                          | Enables asynchronous microbatch planning to reduce planning latency by pre-fetching file scan tasks                             |
 
 ### Read options
 
@@ -220,6 +221,10 @@ spark.read
 | stream-from-timestamp | (none) | A timestamp in milliseconds to stream from; if before the oldest known ancestor snapshot, the oldest will be used                                                             |
 | streaming-max-files-per-micro-batch | INT_MAX | Maximum number of files per microbatch                                                                                                                                        |
 | streaming-max-rows-per-micro-batch  | INT_MAX | "Soft maximum" number of rows per microbatch; always includes all rows in next unprocessed file, excludes additional files if their inclusion would exceed the soft max limit |
+| async-micro-batch-planning-enabled      | false                     | Enables asynchronous microbatch planning to reduce planning latency by pre-fetching file scan tasks                                                                           |
+| streaming-snapshot-polling-interval-ms  | 30000                     | Overrides the polling time for async planner to refresh and detect new snapshots. Only affects when async-micro-batch-planning-enabled is set                                 |
+| async-queue-preload-file-limit          | 100                       | Overrides the number of files loaded to background queue initially. Tune to prevent queue starvation. Only affects when async-micro-batch-planning-enabled is set             |
+| async-queue-preload-row-limit           | 100000                    | Overrides the number of rows loaded to background queue initially. Tune to prevent queue starvation. Only affects when async-micro-batch-planning-enabled is set              |
 
 ### Write options
 
 
@@ -63,6 +63,13 @@ val df = spark.readStream
 !!! info
     Note: In addition to limiting micro-batch sizes on queries that use the default trigger (i.e. `Trigger.ProcessingTime`), rate limiting options can be applied to queries that use `Trigger.AvailableNow` to split one-time processing of all available source data into multiple micro-batches for better query scalability. Rate limiting options will be ignored when using the deprecated `Trigger.Once` trigger.
 
+### Asynchronous Micro-Batch Planning
+
+Users can enable asynchronous micro-batch planning by setting `async-micro-batch-planning-enabled` to true. With this option enabled, Iceberg will start processing the current micro-batch while planning the next micro-batches in parallel.
+This can help improve query throughput by reducing idle time between micro-batches. Users should weigh the tradeoffs, which include higher memory usage and increased snapshot detection latency.
+
+Users can also set additional options to control the behavior of asynchronous micro-batch planning, found in the [spark configuration](spark-configuration.md#read-options).
+
 ## Streaming Writes
 
 To write values from streaming query to Iceberg table, use `DataStreamWriter`:
 
@@ -225,6 +225,39 @@ public int maxRecordsPerMicroBatch() {
         .parse();
   }
 
+  public boolean asyncMicroBatchPlanningEnabled() {
+    return confParser
+        .booleanConf()
+        .option(SparkReadOptions.ASYNC_MICRO_BATCH_PLANNING_ENABLED)
+        .sessionConf(SparkSQLProperties.ASYNC_MICRO_BATCH_PLANNING_ENABLED)
+        .defaultValue(SparkSQLProperties.ASYNC_MICRO_BATCH_PLANNING_ENABLED_DEFAULT)
+        .parse();
+  }
+
+  public long streamingSnapshotPollingIntervalMs() {
+    return confParser
+        .longConf()
+        .option(SparkReadOptions.STREAMING_SNAPSHOT_POLLING_INTERVAL_MS)
+        .defaultValue(SparkReadOptions.STREAMING_SNAPSHOT_POLLING_INTERVAL_MS_DEFAULT)
+        .parse();
+  }
+
+  public long asyncQueuePreloadFileLimit() {
+    return confParser
+        .longConf()
+        .option(SparkReadOptions.ASYNC_QUEUE_PRELOAD_FILE_LIMIT)
+        .defaultValue(SparkReadOptions.ASYNC_QUEUE_PRELOAD_FILE_LIMIT_DEFAULT)
+        .parse();
+  }
+
+  public long asyncQueuePreloadRowLimit() {
+    return confParser
+        .longConf()
+        .option(SparkReadOptions.ASYNC_QUEUE_PRELOAD_ROW_LIMIT)
+        .defaultValue(SparkReadOptions.ASYNC_QUEUE_PRELOAD_ROW_LIMIT_DEFAULT)
+        .parse();
+  }
+
   public boolean preserveDataGrouping() {
     return confParser
         .booleanConf()
 
@@ -84,6 +84,19 @@ private SparkReadOptions() {}
   public static final String STREAMING_MAX_ROWS_PER_MICRO_BATCH =
       "streaming-max-rows-per-micro-batch";
 
+  // Enable async micro batch planning
+  public static final String ASYNC_MICRO_BATCH_PLANNING_ENABLED =
+      "async-micro-batch-planning-enabled";
+  // Polling interval for async planner to refresh table metadata (ms)
+  public static final String STREAMING_SNAPSHOT_POLLING_INTERVAL_MS =
+      "streaming-snapshot-polling-interval-ms";
+  public static final long STREAMING_SNAPSHOT_POLLING_INTERVAL_MS_DEFAULT = 30000L;
+  // Initial queue preload limits for async micro batch planner
+  public static final String ASYNC_QUEUE_PRELOAD_FILE_LIMIT = "async-queue-preload-file-limit";
+  public static final long ASYNC_QUEUE_PRELOAD_FILE_LIMIT_DEFAULT = 100L;
+  public static final String ASYNC_QUEUE_PRELOAD_ROW_LIMIT = "async-queue-preload-row-limit";
+  public static final long ASYNC_QUEUE_PRELOAD_ROW_LIMIT_DEFAULT = 100000L;
+
   // Table path
   public static final String PATH = "path";
 
 
@@ -112,4 +112,9 @@ private SparkSQLProperties() {}
 
   // Prefix for custom snapshot properties
   public static final String SNAPSHOT_PROPERTY_PREFIX = "spark.sql.iceberg.snapshot-property.";
+
+  // Controls whether to enable async micro batch planning for session
+  public static final String ASYNC_MICRO_BATCH_PLANNING_ENABLED =
+      "spark.sql.iceberg.async-micro-batch-planning-enabled";
+  public static final boolean ASYNC_MICRO_BATCH_PLANNING_ENABLED_DEFAULT = false;
 }
Original file line number	Diff line number	Diff line change
`@@ -112,4 +112,9 @@ private SparkSQLProperties() {}`
`112`	`112`
`113`	`113`	`// Prefix for custom snapshot properties`
`114`	`114`	`public static final String SNAPSHOT_PROPERTY_PREFIX = "spark.sql.iceberg.snapshot-property.";`
	`115`	`+`
	`116`	`+ // Controls whether to enable async micro batch planning for session`
	`117`	`+ public static final String ASYNC_MICRO_BATCH_PLANNING_ENABLED =`
	`118`	`+ "spark.sql.iceberg.async-micro-batch-planning-enabled";`
	`119`	`+ public static final boolean ASYNC_MICRO_BATCH_PLANNING_ENABLED_DEFAULT = false;`
`115`	`120`	`}`