@@ -59,6 +59,8 @@ pub struct TableScanBuilder<'a> {
59
59
concurrency_limit_manifest_files : usize ,
60
60
row_group_filtering_enabled : bool ,
61
61
row_selection_enabled : bool ,
62
+
63
+ limit : Option < usize > ,
62
64
}
63
65
64
66
impl < ' a > TableScanBuilder < ' a > {
@@ -77,9 +79,16 @@ impl<'a> TableScanBuilder<'a> {
77
79
concurrency_limit_manifest_files : num_cpus,
78
80
row_group_filtering_enabled : true ,
79
81
row_selection_enabled : false ,
82
+ limit : None ,
80
83
}
81
84
}
82
85
86
+ /// Sets the maximum number of records to return
87
+ pub fn with_limit ( mut self , limit : Option < usize > ) -> Self {
88
+ self . limit = limit;
89
+ self
90
+ }
91
+
83
92
/// Sets the desired size of batches in the response
84
93
/// to something other than the default
85
94
pub fn with_batch_size ( mut self , batch_size : Option < usize > ) -> Self {
@@ -281,6 +290,7 @@ impl<'a> TableScanBuilder<'a> {
281
290
snapshot_schema : schema,
282
291
case_sensitive : self . case_sensitive ,
283
292
predicate : self . filter . map ( Arc :: new) ,
293
+ limit : self . limit ,
284
294
snapshot_bound_predicate : snapshot_bound_predicate. map ( Arc :: new) ,
285
295
object_cache : self . table . object_cache ( ) ,
286
296
field_ids : Arc :: new ( field_ids) ,
@@ -1406,6 +1416,130 @@ pub mod tests {
1406
1416
assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1407
1417
}
1408
1418
1419
+ #[ tokio:: test]
1420
+ async fn test_limit ( ) {
1421
+ let mut fixture = TableTestFixture :: new ( ) ;
1422
+ fixture. setup_manifest_files ( ) . await ;
1423
+
1424
+ let mut builder = fixture. table . scan ( ) ;
1425
+ builder = builder. with_limit ( Some ( 1 ) ) ;
1426
+ let table_scan = builder. build ( ) . unwrap ( ) ;
1427
+
1428
+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1429
+
1430
+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1431
+
1432
+ assert_eq ! ( batches. len( ) , 2 ) ;
1433
+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1434
+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1435
+
1436
+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1437
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1438
+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1439
+
1440
+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1441
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1442
+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1443
+
1444
+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1445
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1446
+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1447
+
1448
+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1449
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1450
+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1451
+ }
1452
+
1453
+ #[ tokio:: test]
1454
+ async fn test_limit_with_predicate ( ) {
1455
+ let mut fixture = TableTestFixture :: new ( ) ;
1456
+ fixture. setup_manifest_files ( ) . await ;
1457
+
1458
+ // Filter: y > 3
1459
+ let mut builder = fixture. table . scan ( ) ;
1460
+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1461
+ builder = builder. with_filter ( predicate) . with_limit ( Some ( 1 ) ) ;
1462
+ let table_scan = builder. build ( ) . unwrap ( ) ;
1463
+
1464
+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1465
+
1466
+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1467
+
1468
+ assert_eq ! ( batches. len( ) , 2 ) ;
1469
+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1470
+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1471
+
1472
+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1473
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1474
+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1475
+
1476
+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1477
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1478
+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1479
+ }
1480
+
1481
+ #[ tokio:: test]
1482
+ async fn test_limit_with_predicate_and_row_selection ( ) {
1483
+ let mut fixture = TableTestFixture :: new ( ) ;
1484
+ fixture. setup_manifest_files ( ) . await ;
1485
+
1486
+ // Filter: y > 3
1487
+ let mut builder = fixture. table . scan ( ) ;
1488
+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1489
+ builder = builder
1490
+ . with_filter ( predicate)
1491
+ . with_limit ( Some ( 1 ) )
1492
+ . with_row_selection_enabled ( true ) ;
1493
+ let table_scan = builder. build ( ) . unwrap ( ) ;
1494
+
1495
+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1496
+
1497
+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1498
+
1499
+ assert_eq ! ( batches. len( ) , 2 ) ;
1500
+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1501
+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1502
+
1503
+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1504
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1505
+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1506
+
1507
+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1508
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1509
+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1510
+ }
1511
+
1512
+ #[ tokio:: test]
1513
+ async fn test_limit_higher_than_total_rows ( ) {
1514
+ let mut fixture = TableTestFixture :: new ( ) ;
1515
+ fixture. setup_manifest_files ( ) . await ;
1516
+
1517
+ // Filter: y > 3
1518
+ let mut builder = fixture. table . scan ( ) ;
1519
+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1520
+ builder = builder
1521
+ . with_filter ( predicate)
1522
+ . with_limit ( Some ( 100_000_000 ) )
1523
+ . with_row_selection_enabled ( true ) ;
1524
+ let table_scan = builder. build ( ) . unwrap ( ) ;
1525
+
1526
+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1527
+
1528
+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1529
+
1530
+ assert_eq ! ( batches. len( ) , 2 ) ;
1531
+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 312 ) ;
1532
+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 312 ) ;
1533
+
1534
+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1535
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1536
+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1537
+
1538
+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1539
+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1540
+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1541
+ }
1542
+
1409
1543
#[ tokio:: test]
1410
1544
async fn test_filter_on_arrow_gt_eq ( ) {
1411
1545
let mut fixture = TableTestFixture :: new ( ) ;
@@ -1780,6 +1914,7 @@ pub mod tests {
1780
1914
record_count : Some ( 100 ) ,
1781
1915
data_file_format : DataFileFormat :: Parquet ,
1782
1916
deletes : vec ! [ ] ,
1917
+ limit : None ,
1783
1918
} ;
1784
1919
test_fn ( task) ;
1785
1920
@@ -1794,6 +1929,7 @@ pub mod tests {
1794
1929
record_count : None ,
1795
1930
data_file_format : DataFileFormat :: Avro ,
1796
1931
deletes : vec ! [ ] ,
1932
+ limit : None ,
1797
1933
} ;
1798
1934
test_fn ( task) ;
1799
1935
}
0 commit comments