@@ -80,7 +80,7 @@ as_sedonadb_dataframe.datafusion_table_provider <- function(x, ..., schema = NUL
8080
8181# ' Count rows in a DataFrame
8282# '
83- # ' @param .data A sedonadb_dataframe
83+ # ' @param .data A sedonadb_dataframe or an object that can be coerced to one.
8484# '
8585# ' @returns The number of rows after executing the query
8686# ' @export
@@ -89,6 +89,7 @@ as_sedonadb_dataframe.datafusion_table_provider <- function(x, ..., schema = NUL
8989# ' sd_sql("SELECT 1 as one") |> sd_count()
9090# '
9191sd_count <- function (.data ) {
92+ .data <- as_sedonadb_dataframe(.data )
9293 .data $ df $ count()
9394}
9495
@@ -193,6 +194,91 @@ sd_preview <- function(.data, n = NULL, ascii = NULL, width = NULL) {
193194 invisible (.data )
194195}
195196
197+ # ' Keep or drop columns of a SedonaDB DataFrame
198+ # '
199+ # ' @inheritParams sd_count
200+ # ' @param ... One or more bare names. Evaluated like [dplyr::select()].
201+ # '
202+ # ' @returns An object of class sedonadb_dataframe
203+ # ' @export
204+ # '
205+ # ' @examples
206+ # ' data.frame(x = 1:10, y = letters[1:10]) |> sd_select(x)
207+ # '
208+ sd_select <- function (.data , ... ) {
209+ .data <- as_sedonadb_dataframe(.data )
210+ schema <- nanoarrow :: infer_nanoarrow_schema(.data )
211+ ptype <- nanoarrow :: infer_nanoarrow_ptype(schema )
212+ loc <- tidyselect :: eval_select(rlang :: expr(c(... )), data = ptype )
213+
214+ df <- .data $ df $ select_indices(names(loc ), loc - 1L )
215+ new_sedonadb_dataframe(.data $ ctx , df )
216+ }
217+
218+ # ' Create, modify, and delete columns of a SedonaDB DataFrame
219+ # '
220+ # ' @inheritParams sd_count
221+ # ' @param ... Named expressions for new columns to create. These are evaluated
222+ # ' in the same way as [dplyr::transmute()] except does not support extra
223+ # ' dplyr features such as `across()` or `.by`.
224+ # '
225+ # ' @returns An object of class sedonadb_dataframe
226+ # ' @export
227+ # '
228+ # ' @examples
229+ # ' data.frame(x = 1:10) |>
230+ # ' sd_transmute(y = x + 1L)
231+ # '
232+ sd_transmute <- function (.data , ... ) {
233+ .data <- as_sedonadb_dataframe(.data )
234+ expr_quos <- rlang :: enquos(... )
235+ env <- parent.frame()
236+
237+ expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data ), env )
238+ r_exprs <- expr_quos | > rlang :: quos_auto_name() | > lapply(rlang :: quo_get_expr )
239+ sd_exprs <- lapply(r_exprs , sd_eval_expr , expr_ctx = expr_ctx , env = env )
240+
241+ # Ensure inputs are given aliases to account for the expected column name
242+ exprs_names <- names(r_exprs )
243+ for (i in seq_along(sd_exprs )) {
244+ name <- exprs_names [i ]
245+ if (! is.na(name ) && name != " " ) {
246+ sd_exprs [[i ]] <- sd_expr_alias(sd_exprs [[i ]], name , expr_ctx $ factory )
247+ }
248+ }
249+
250+ df <- .data $ df $ select(sd_exprs )
251+ new_sedonadb_dataframe(.data $ ctx , df )
252+ }
253+
254+ # ' Keep rows of a SedonaDB DataFrame that match a condition
255+ # '
256+ # ' @inheritParams sd_count
257+ # ' @param ... Unnamed expressions for filter conditions. These are evaluated
258+ # ' in the same way as [dplyr::filter()] except does not support extra
259+ # ' dplyr features such as `across()` or `.by`.
260+ # '
261+ # ' @returns An object of class sedonadb_dataframe
262+ # ' @export
263+ # '
264+ # ' @examples
265+ # ' data.frame(x = 1:10) |> sd_filter(x > 5)
266+ # '
267+ sd_filter <- function (.data , ... ) {
268+ .data <- as_sedonadb_dataframe(.data )
269+ rlang :: check_dots_unnamed()
270+
271+ expr_quos <- rlang :: enquos(... )
272+ env <- parent.frame()
273+
274+ expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data ), env )
275+ r_exprs <- expr_quos | > lapply(rlang :: quo_get_expr )
276+ sd_exprs <- lapply(r_exprs , sd_eval_expr , expr_ctx = expr_ctx , env = env )
277+
278+ df <- .data $ df $ filter(sd_exprs )
279+ new_sedonadb_dataframe(.data $ ctx , df )
280+ }
281+
196282# ' Write DataFrame to (Geo)Parquet files
197283# '
198284# ' Write this DataFrame to one or more (Geo)Parquet files. For input that contains
@@ -246,6 +332,8 @@ sd_write_parquet <- function(
246332 geoparquet_version = " 1.0" ,
247333 overwrite_bbox_columns = FALSE
248334) {
335+ .data <- as_sedonadb_dataframe(.data )
336+
249337 # Determine single_file_output default based on path and partition_by
250338 if (is.null(single_file_output )) {
251339 single_file_output <- length(partition_by ) == 0 && grepl(" \\ .parquet$" , path )
0 commit comments