Skip to content

Commit 293870f

Browse files
authored
feat: Implement argument matching for funnel (#546)
* feat: Implement argument matching for `funnel` * Text * 1 * 3 * 4 * 5 * 6 * 7 * 2
1 parent f5f9e2f commit 293870f

File tree

8 files changed

+43
-19
lines changed

8 files changed

+43
-19
lines changed

R/ducktbl.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
#' try(length(y$a))
5151
#' length(collect(y)$a)
5252
#' @export
53-
duckdb_tibble <- function(..., .funnel = "open") {
53+
duckdb_tibble <- function(..., .funnel = c("open", "drip", "closed")) {
5454
out <- tibble::tibble(...)
5555

5656
# Side effect: check compatibility
@@ -70,7 +70,7 @@ duckdb_tibble <- function(..., .funnel = "open") {
7070
#' @param x The object to convert or to test.
7171
#' @rdname duckdb_tibble
7272
#' @export
73-
as_duckdb_tibble <- function(x, ..., funnel = "open") {
73+
as_duckdb_tibble <- function(x, ..., funnel = c("open", "drip", "closed")) {
7474
# Handle the funnel arg in the generic, only the other args will be dispatched
7575
as_duckdb_tibble <- function(x, ...) {
7676
UseMethod("as_duckdb_tibble")
@@ -87,6 +87,7 @@ as_duckdb_tibble.tbl_duckdb_connection <- function(x, ...) {
8787
con <- dbplyr::remote_con(x)
8888
sql <- dbplyr::remote_query(x)
8989

90+
# Start restrictive to avoid accidental materialization
9091
read_sql_duckdb(sql, funnel = "closed", con = con)
9192
}
9293

R/funnel.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ new_duckdb_tibble <- function(x, class = NULL, funnel = "open", refunnel = FALSE
3030
}
3131

3232
class(x) <- c(
33-
if (!identical(funnel, "open")) "funneled_duckplyr_df",
33+
if (!identical(funnel_parsed$funnel, "open")) "funneled_duckplyr_df",
3434
"duckplyr_df",
3535
class
3636
)
@@ -78,6 +78,8 @@ funnel_parse <- function(funnel, call = caller_env()) {
7878
} else if (!is.character(funnel)) {
7979
cli::cli_abort("{.arg funnel} must be an unnamed character vector or a named numeric vector", call = call)
8080
} else {
81+
funnel <- arg_match(funnel, c("open", "closed", "drip"), error_call = call)
82+
8183
allow_materialization <- !identical(funnel, "closed")
8284
if (!allow_materialization) {
8385
n_cells <- 0

R/io2.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ NULL
1313
#'
1414
#' @rdname read_file_duckdb
1515
#' @export
16-
read_parquet_duckdb <- function(path, ..., funnel = "drip", options = list()) {
16+
read_parquet_duckdb <- function(path, ..., funnel = c("drip", "open", "closed"), options = list()) {
1717
check_dots_empty()
1818

1919
read_file_duckdb(path, "read_parquet", funnel = funnel, options = options)
@@ -50,7 +50,7 @@ read_parquet_duckdb <- function(path, ..., funnel = "drip", options = list()) {
5050
#' path,
5151
#' options = list(delim = ",", types = list(c("DOUBLE", "VARCHAR")))
5252
#' )
53-
read_csv_duckdb <- function(path, ..., funnel = "drip", options = list()) {
53+
read_csv_duckdb <- function(path, ..., funnel = c("drip", "open", "closed"), options = list()) {
5454
check_dots_empty()
5555

5656
read_file_duckdb(path, "read_csv_auto", funnel = funnel, options = options)
@@ -71,7 +71,7 @@ read_csv_duckdb <- function(path, ..., funnel = "drip", options = list()) {
7171
#' db_exec("INSTALL json")
7272
#' db_exec("LOAD json")
7373
#' read_json_duckdb(path)
74-
read_json_duckdb <- function(path, ..., funnel = "drip", options = list()) {
74+
read_json_duckdb <- function(path, ..., funnel = c("drip", "open", "closed"), options = list()) {
7575
check_dots_empty()
7676

7777
read_file_duckdb(path, "read_json", funnel = funnel, options = options)
@@ -104,7 +104,7 @@ read_file_duckdb <- function(
104104
path,
105105
table_function,
106106
...,
107-
funnel = "drip",
107+
funnel = c("drip", "open", "closed"),
108108
options = list()
109109
) {
110110
check_dots_empty()

R/sql.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#' @export
1919
#' @examples
2020
#' read_sql_duckdb("FROM duckdb_settings()")
21-
read_sql_duckdb <- function(sql, ..., funnel = "drip", con = NULL) {
21+
read_sql_duckdb <- function(sql, ..., funnel = c("drip", "open", "closed"), con = NULL) {
2222
if (!is_string(sql)) {
2323
cli::cli_abort("{.arg sql} must be a string.")
2424
}

man/duckdb_tibble.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/read_file_duckdb.Rd

Lines changed: 25 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/read_sql_duckdb.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/funnel.Rmd

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,17 +244,17 @@ In dtplyr and dbplyr, there are no unfunneled frames: collection always needs to
244244

245245
## Partial funneling
246246

247-
Partial funneling is a compromise between funneling and unfunneling.
247+
Partial funneling is a compromise between funneled and unfunneled.
248248
Materialization is allowed for data up to a certain size, measured in cells (values) and rows in the resulting data frame.
249249

250250
```{r}
251251
nrow(flights)
252252
flights_partial <-
253253
flights |>
254-
duckplyr::as_duckdb_tibble(funnel = c(rows = 100000))
254+
duckplyr::as_duckdb_tibble(funnel = "drip")
255255
```
256256

257-
In this example, the data is materialized only if the result has fewer than 100,000 rows.
257+
With this setting, the data is materialized only if the result has fewer than 1,000,000 cells (rows multiplied by columns).
258258

259259
```{r error = TRUE}
260260
flights_partial |>
@@ -272,5 +272,5 @@ flights_partial |>
272272
```
273273

274274
Partial funneling is a good choice for data sets where the cost of materializing the data is prohibitive only for large results.
275-
The default for the ingestion functions like `read_parquet_duckdb()` is to limit the result size to one million cells (values in the resulting data frame).
275+
This is the default for the ingestion functions like `read_parquet_duckdb()`.
276276
See `vignette("large")` for more details on working with large data sets.

0 commit comments

Comments
 (0)