@@ -25,7 +25,7 @@ use arrow::datatypes::SchemaRef;
25
25
use async_trait:: async_trait;
26
26
use datafusion_common:: Result ;
27
27
use datafusion_common:: { not_impl_err, Constraints , Statistics } ;
28
- use datafusion_expr:: Expr ;
28
+ use datafusion_expr:: { Expr , SortExpr } ;
29
29
30
30
use datafusion_expr:: dml:: InsertOp ;
31
31
use datafusion_expr:: {
@@ -171,6 +171,41 @@ pub trait TableProvider: Debug + Sync + Send {
171
171
limit : Option < usize > ,
172
172
) -> Result < Arc < dyn ExecutionPlan > > ;
173
173
174
+ /// Create an [`ExecutionPlan`] for scanning the table using structured arguments.
175
+ ///
176
+ /// This method uses [`ScanArgs`] to pass scan parameters in a structured way
177
+ /// and returns a [`ScanResult`] containing the execution plan. This approach
178
+ /// allows for extensible parameter passing and result handling.
179
+ ///
180
+ /// Table providers can override this method to take advantage of additional
181
+ /// parameters like `preferred_ordering` that may not be available through
182
+ /// other scan methods.
183
+ ///
184
+ /// # Arguments
185
+ /// * `state` - The session state containing configuration and context
186
+ /// * `args` - Structured scan arguments including projection, filters, limit, and ordering preferences
187
+ ///
188
+ /// # Returns
189
+ /// A [`ScanResult`] containing the [`ExecutionPlan`] for scanning the table
190
+ ///
191
+ /// See [`Self::scan`] for detailed documentation about projection, filters, and limits.
192
+ async fn scan_with_args (
193
+ & self ,
194
+ state : & dyn Session ,
195
+ args : ScanArgs ,
196
+ ) -> Result < ScanResult > {
197
+ let ScanArgs {
198
+ filters,
199
+ projection,
200
+ limit,
201
+ } = args;
202
+ let filters = filters. unwrap_or_default ( ) ;
203
+ let plan = self
204
+ . scan ( state, projection. as_ref ( ) , & filters, limit)
205
+ . await ?;
206
+ Ok ( ScanResult :: new ( plan) )
207
+ }
208
+
174
209
/// Specify if DataFusion should provide filter expressions to the
175
210
/// TableProvider to apply *during* the scan.
176
211
///
@@ -299,6 +334,119 @@ pub trait TableProvider: Debug + Sync + Send {
299
334
}
300
335
}
301
336
337
+ /// Arguments for scanning a table with [`TableProvider::scan_with_args`].
338
+ ///
339
+ /// `ScanArgs` provides a structured way to pass scan parameters to table providers,
340
+ /// replacing the multiple individual parameters used by [`TableProvider::scan`].
341
+ /// This struct uses the builder pattern for convenient construction.
342
+ ///
343
+ /// # Examples
344
+ ///
345
+ /// ```
346
+ /// # use datafusion_catalog::ScanArgs;
347
+ /// # use datafusion_expr::Expr;
348
+ /// let args = ScanArgs::default()
349
+ /// .with_projection(Some(vec![0, 2, 4]))
350
+ /// .with_limit(Some(1000));
351
+ /// ```
352
+ #[ derive( Debug , Clone , Default ) ]
353
+ pub struct ScanArgs {
354
+ filters : Option < Vec < Expr > > ,
355
+ projection : Option < Vec < usize > > ,
356
+ limit : Option < usize > ,
357
+ }
358
+
359
+ impl ScanArgs {
360
+ /// Set the column projection for the scan.
361
+ ///
362
+ /// The projection is a list of column indices from [`TableProvider::schema`]
363
+ /// that should be included in the scan results. If `None`, all columns are included.
364
+ ///
365
+ /// # Arguments
366
+ /// * `projection` - Optional list of column indices to project
367
+ pub fn with_projection ( mut self , projection : Option < Vec < usize > > ) -> Self {
368
+ self . projection = projection;
369
+ self
370
+ }
371
+
372
+ /// Get the column projection for the scan.
373
+ ///
374
+ /// Returns a cloned copy of the projection column indices, or `None` if
375
+ /// no projection was specified (meaning all columns should be included).
376
+ pub fn projection ( & self ) -> Option < Vec < usize > > {
377
+ self . projection . clone ( )
378
+ }
379
+
380
+ /// Set the filter expressions for the scan.
381
+ ///
382
+ /// Filters are boolean expressions that should be evaluated during the scan
383
+ /// to reduce the number of rows returned. All expressions are combined with AND logic.
384
+ /// Whether filters are actually pushed down depends on [`TableProvider::supports_filters_pushdown`].
385
+ ///
386
+ /// # Arguments
387
+ /// * `filters` - Optional list of filter expressions
388
+ pub fn with_filters ( mut self , filters : Option < Vec < Expr > > ) -> Self {
389
+ self . filters = filters;
390
+ self
391
+ }
392
+
393
+ /// Get the filter expressions for the scan.
394
+ ///
395
+ /// Returns a reference to the filter expressions, or `None` if no filters were specified.
396
+ pub fn filters ( & self ) -> Option < & [ Expr ] > {
397
+ self . filters . as_deref ( )
398
+ }
399
+
400
+ /// Set the maximum number of rows to return from the scan.
401
+ ///
402
+ /// If specified, the scan should return at most this many rows. This is typically
403
+ /// used to optimize queries with `LIMIT` clauses.
404
+ ///
405
+ /// # Arguments
406
+ /// * `limit` - Optional maximum number of rows to return
407
+ pub fn with_limit ( mut self , limit : Option < usize > ) -> Self {
408
+ self . limit = limit;
409
+ self
410
+ }
411
+
412
+ /// Get the maximum number of rows to return from the scan.
413
+ ///
414
+ /// Returns the row limit, or `None` if no limit was specified.
415
+ pub fn limit ( & self ) -> Option < usize > {
416
+ self . limit
417
+ }
418
+ }
419
+
420
+ /// Result of a table scan operation from [`TableProvider::scan_with_args`].
421
+ ///
422
+ /// `ScanResult` encapsulates the [`ExecutionPlan`] produced by a table scan,
423
+ /// providing a typed return value instead of returning the plan directly.
424
+ /// This allows for future extensibility of scan results without breaking
425
+ /// the API.
426
+ #[ derive( Debug , Clone ) ]
427
+ pub struct ScanResult {
428
+ /// The ExecutionPlan to run.
429
+ plan : Arc < dyn ExecutionPlan > ,
430
+ }
431
+
432
+ impl ScanResult {
433
+ /// Create a new `ScanResult` with the given execution plan.
434
+ ///
435
+ /// # Arguments
436
+ /// * `plan` - The execution plan that will perform the table scan
437
+ pub fn new ( plan : Arc < dyn ExecutionPlan > ) -> Self {
438
+ Self { plan }
439
+ }
440
+
441
+ /// Get the execution plan for this scan result.
442
+ ///
443
+ /// Returns a cloned reference to the [`ExecutionPlan`] that will perform
444
+ /// the actual table scanning and data retrieval.
445
+ pub fn plan ( & self ) -> Arc < dyn ExecutionPlan > {
446
+ Arc :: clone ( & self . plan )
447
+ }
448
+ }
449
+
302
450
/// A factory which creates [`TableProvider`]s at runtime given a URL.
303
451
///
304
452
/// For example, this can be used to create a table "on the fly"
0 commit comments