Skip to content

Commit c38dbc6

Browse files
authored
feat(r/sedonadb): Add basic DataFrame API with sd_select(), sd_transmute(), and sd_filter() (#499)
1 parent ace5542 commit c38dbc6

File tree

17 files changed

+299
-8
lines changed

17 files changed

+299
-8
lines changed

r/sedonadb/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,14 @@ export(sd_expr_factory)
4545
export(sd_expr_literal)
4646
export(sd_expr_negative)
4747
export(sd_expr_scalar_function)
48+
export(sd_filter)
4849
export(sd_preview)
4950
export(sd_read_parquet)
5051
export(sd_register_udf)
52+
export(sd_select)
5153
export(sd_sql)
5254
export(sd_to_view)
55+
export(sd_transmute)
5356
export(sd_view)
5457
export(sd_write_parquet)
5558
export(sedonadb_adbc)

r/sedonadb/R/000-wrappers.R

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sedonadb/R/dataframe.R

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ as_sedonadb_dataframe.datafusion_table_provider <- function(x, ..., schema = NUL
8080

8181
#' Count rows in a DataFrame
8282
#'
83-
#' @param .data A sedonadb_dataframe
83+
#' @param .data A sedonadb_dataframe or an object that can be coerced to one.
8484
#'
8585
#' @returns The number of rows after executing the query
8686
#' @export
@@ -89,6 +89,7 @@ as_sedonadb_dataframe.datafusion_table_provider <- function(x, ..., schema = NUL
8989
#' sd_sql("SELECT 1 as one") |> sd_count()
9090
#'
9191
sd_count <- function(.data) {
92+
.data <- as_sedonadb_dataframe(.data)
9293
.data$df$count()
9394
}
9495

@@ -193,6 +194,91 @@ sd_preview <- function(.data, n = NULL, ascii = NULL, width = NULL) {
193194
invisible(.data)
194195
}
195196

197+
#' Keep or drop columns of a SedonaDB DataFrame
198+
#'
199+
#' @inheritParams sd_count
200+
#' @param ... One or more bare names. Evaluated like [dplyr::select()].
201+
#'
202+
#' @returns An object of class sedonadb_dataframe
203+
#' @export
204+
#'
205+
#' @examples
206+
#' data.frame(x = 1:10, y = letters[1:10]) |> sd_select(x)
207+
#'
208+
sd_select <- function(.data, ...) {
209+
.data <- as_sedonadb_dataframe(.data)
210+
schema <- nanoarrow::infer_nanoarrow_schema(.data)
211+
ptype <- nanoarrow::infer_nanoarrow_ptype(schema)
212+
loc <- tidyselect::eval_select(rlang::expr(c(...)), data = ptype)
213+
214+
df <- .data$df$select_indices(names(loc), loc - 1L)
215+
new_sedonadb_dataframe(.data$ctx, df)
216+
}
217+
218+
#' Create, modify, and delete columns of a SedonaDB DataFrame
219+
#'
220+
#' @inheritParams sd_count
221+
#' @param ... Named expressions for new columns to create. These are evaluated
222+
#' in the same way as [dplyr::transmute()] except does not support extra
223+
#' dplyr features such as `across()` or `.by`.
224+
#'
225+
#' @returns An object of class sedonadb_dataframe
226+
#' @export
227+
#'
228+
#' @examples
229+
#' data.frame(x = 1:10) |>
230+
#' sd_transmute(y = x + 1L)
231+
#'
232+
sd_transmute <- function(.data, ...) {
233+
.data <- as_sedonadb_dataframe(.data)
234+
expr_quos <- rlang::enquos(...)
235+
env <- parent.frame()
236+
237+
expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data), env)
238+
r_exprs <- expr_quos |> rlang::quos_auto_name() |> lapply(rlang::quo_get_expr)
239+
sd_exprs <- lapply(r_exprs, sd_eval_expr, expr_ctx = expr_ctx, env = env)
240+
241+
# Ensure inputs are given aliases to account for the expected column name
242+
exprs_names <- names(r_exprs)
243+
for (i in seq_along(sd_exprs)) {
244+
name <- exprs_names[i]
245+
if (!is.na(name) && name != "") {
246+
sd_exprs[[i]] <- sd_expr_alias(sd_exprs[[i]], name, expr_ctx$factory)
247+
}
248+
}
249+
250+
df <- .data$df$select(sd_exprs)
251+
new_sedonadb_dataframe(.data$ctx, df)
252+
}
253+
254+
#' Keep rows of a SedonaDB DataFrame that match a condition
255+
#'
256+
#' @inheritParams sd_count
257+
#' @param ... Unnamed expressions for filter conditions. These are evaluated
258+
#' in the same way as [dplyr::filter()] except does not support extra
259+
#' dplyr features such as `across()` or `.by`.
260+
#'
261+
#' @returns An object of class sedonadb_dataframe
262+
#' @export
263+
#'
264+
#' @examples
265+
#' data.frame(x = 1:10) |> sd_filter(x > 5)
266+
#'
267+
sd_filter <- function(.data, ...) {
268+
.data <- as_sedonadb_dataframe(.data)
269+
rlang::check_dots_unnamed()
270+
271+
expr_quos <- rlang::enquos(...)
272+
env <- parent.frame()
273+
274+
expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data), env)
275+
r_exprs <- expr_quos |> lapply(rlang::quo_get_expr)
276+
sd_exprs <- lapply(r_exprs, sd_eval_expr, expr_ctx = expr_ctx, env = env)
277+
278+
df <- .data$df$filter(sd_exprs)
279+
new_sedonadb_dataframe(.data$ctx, df)
280+
}
281+
196282
#' Write DataFrame to (Geo)Parquet files
197283
#'
198284
#' Write this DataFrame to one or more (Geo)Parquet files. For input that contains
@@ -246,6 +332,8 @@ sd_write_parquet <- function(
246332
geoparquet_version = "1.0",
247333
overwrite_bbox_columns = FALSE
248334
) {
335+
.data <- as_sedonadb_dataframe(.data)
336+
249337
# Determine single_file_output default based on path and partition_by
250338
if (is.null(single_file_output)) {
251339
single_file_output <- length(partition_by) == 0 && grepl("\\.parquet$", path)

r/sedonadb/R/expression.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ print.SedonaDBExpr <- function(x, ...) {
138138
#'
139139
#' @param expr An R expression (e.g., the result of `quote()`).
140140
#' @param expr_ctx An `sd_expr_ctx()`
141+
#' @param env An evaluation environment. Defaults to the calling environment.
141142
#'
142143
#' @returns A `SedonaDBExpr`
143144
#' @noRd

r/sedonadb/man/sd_compute.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sedonadb/man/sd_count.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sedonadb/man/sd_filter.Rd

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sedonadb/man/sd_preview.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sedonadb/man/sd_select.Rd

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sedonadb/man/sd_to_view.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)