Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
- `<expr>$name$replace()` to replace expression names using regular expressions (#1654).
- `<expr>$dt$days_in_month()` (#1659).
- `<expr>$rolling_rank()` and `<expr>$rolling_rank_by()` (#1656).
- `<expr>$arr$agg()` and `<expr>$list$agg()`, similar to their `$eval()` counterparts
but automatically explode the column if all elements return a scalar (#1655).

### Other changes

- `<expr>$list$eval()` now properly errors (as documented) if the input is not a Polars expression (#1655).

## polars 1.6.0

Expand Down
16 changes: 16 additions & 0 deletions R/000-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,13 @@ class(`PlRDataTypeExpr`) <- c("PlRDataTypeExpr__bundle", "savvy_polars__sealed")
}
}

`PlRExpr_arr_agg` <- function(self) {
function(`expr`) {
`expr` <- .savvy_extract_ptr(`expr`, "PlRExpr")
.savvy_wrap_PlRExpr(.Call(savvy_PlRExpr_arr_agg__impl, `self`, `expr`))
}
}

`PlRExpr_arr_all` <- function(self) {
function() {
.savvy_wrap_PlRExpr(.Call(savvy_PlRExpr_arr_all__impl, `self`))
Expand Down Expand Up @@ -2173,6 +2180,13 @@ class(`PlRDataTypeExpr`) <- c("PlRDataTypeExpr__bundle", "savvy_polars__sealed")
}
}

`PlRExpr_list_agg` <- function(self) {
function(`expr`) {
`expr` <- .savvy_extract_ptr(`expr`, "PlRExpr")
.savvy_wrap_PlRExpr(.Call(savvy_PlRExpr_list_agg__impl, `self`, `expr`))
}
}

`PlRExpr_list_all` <- function(self) {
function() {
.savvy_wrap_PlRExpr(.Call(savvy_PlRExpr_list_all__impl, `self`))
Expand Down Expand Up @@ -3474,6 +3488,7 @@ class(`PlRDataTypeExpr`) <- c("PlRDataTypeExpr__bundle", "savvy_polars__sealed")
e$`arg_min` <- `PlRExpr_arg_min`(ptr)
e$`arg_sort` <- `PlRExpr_arg_sort`(ptr)
e$`arg_unique` <- `PlRExpr_arg_unique`(ptr)
e$`arr_agg` <- `PlRExpr_arr_agg`(ptr)
e$`arr_all` <- `PlRExpr_arr_all`(ptr)
e$`arr_any` <- `PlRExpr_arr_any`(ptr)
e$`arr_arg_max` <- `PlRExpr_arr_arg_max`(ptr)
Expand Down Expand Up @@ -3630,6 +3645,7 @@ class(`PlRDataTypeExpr`) <- c("PlRDataTypeExpr__bundle", "savvy_polars__sealed")
e$`kurtosis` <- `PlRExpr_kurtosis`(ptr)
e$`last` <- `PlRExpr_last`(ptr)
e$`len` <- `PlRExpr_len`(ptr)
e$`list_agg` <- `PlRExpr_list_agg`(ptr)
e$`list_all` <- `PlRExpr_list_all`(ptr)
e$`list_any` <- `PlRExpr_list_any`(ptr)
e$`list_arg_max` <- `PlRExpr_list_arg_max`(ptr)
Expand Down
36 changes: 35 additions & 1 deletion R/expr-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,40 @@ expr_arr_len <- function() {
expr_arr_eval <- function(expr, ..., as_list = FALSE) {
wrap({
check_dots_empty0(...)
self$`_rexpr`$arr_eval(as_polars_expr(expr)$`_rexpr`, as_list)
check_polars_expr(expr)
self$`_rexpr`$arr_eval(expr$`_rexpr`, as_list)
})
}

#' Run any polars aggregation expression against the array's elements
#'
#' This looks similar to [`$arr$eval()`][expr_arr_eval], but the key
#' difference is that `$arr$agg()` automatically explodes the array if the
#' expression inside returns a scalar (while `$arr$eval()` always returns an
#' array).
#'
#' @inheritParams expr_list_eval
#'
#' @inherit as_polars_expr return
#' @examples
#' df <- pl$DataFrame(a = list(c(1, NA), c(42, 13), c(NA, NA)))$
#' cast(pl$Array(pl$Int64, 2))
#'
#' # The column "null_count" has dtype u32 because `$null_count()` returns a
#' # scalar for each sub-array. Using `$arr$eval()` instead would return a
#' # column with dtype arr(u32).
#' df$with_columns(
#' null_count = pl$col("a")$arr$agg(pl$element()$null_count())
#' )
#'
#' # The column "no_nulls" has dtype arr(u32) because the expression doesn't
#' # guarantee to return a scalar.
#' df$with_columns(
#' no_nulls = pl$col("a")$arr$agg(pl$element()$drop_nulls())
#' )
expr_arr_agg <- function(expr) {
wrap({
check_polars_expr(expr)
self$`_rexpr`$arr_agg(expr$`_rexpr`)
})
}
38 changes: 36 additions & 2 deletions R/expr-list.R
Original file line number Diff line number Diff line change
Expand Up @@ -514,8 +514,10 @@ expr_list_tail <- function(n = 5L) {
#' pl$col("b")$list$eval(pl$element()$str$join(" "))$list$first()
#' )
expr_list_eval <- function(expr) {
self$`_rexpr`$list_eval(as_polars_expr(expr)$`_rexpr`) |>
wrap()
wrap({
check_polars_expr(expr)
self$`_rexpr`$list_eval(expr$`_rexpr`)
})
}

#' Evaluate whether all boolean values in a sub-list are true
Expand Down Expand Up @@ -876,3 +878,35 @@ expr_list_count_matches <- function(element) {
self$`_rexpr`$list_count_matches(as_polars_expr(element, as_lit = TRUE)$`_rexpr`) |>
wrap()
}

#' Run any polars aggregation expression against the lists' elements
#'
#' This looks similar to [`$list$eval()`][expr_list_eval], but the key
#' difference is that `$list$agg()` automatically explodes the list if the
#' expression inside returns a scalar (while `$list$eval()` always returns a
#' list).
#'
#' @inheritParams expr_list_eval
#'
#' @inherit as_polars_expr return
#' @examples
#' df <- pl$DataFrame(a = list(c(1, NA), c(42, 13), c(NA, NA)))
#'
#' # The column "null_count" has dtype u32 because `$null_count()` returns a
#' # scalar for each sub-list. Using `$list$eval()` instead would return a
#' # column with dtype list(u32).
#' df$with_columns(
#' null_count = pl$col("a")$list$agg(pl$element()$null_count())
#' )
#'
#' # The column "no_nulls" has dtype list(u32) because the expression doesn't
#' # guarantee to return a scalar.
#' df$with_columns(
#' no_nulls = pl$col("a")$list$agg(pl$element()$drop_nulls())
#' )
expr_list_agg <- function(expr) {
wrap({
check_polars_expr(expr)
self$`_rexpr`$list_agg(expr$`_rexpr`)
})
}
2 changes: 2 additions & 0 deletions altdoc/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ nav:
- var: man/expr__var.md
- xor: man/expr__xor.md
- Array:
- arr_agg: man/expr_arr_agg.md
- arr_all: man/expr_arr_all.md
- arr_any: man/expr_arr_any.md
- arr_arg_max: man/expr_arr_arg_max.md
Expand Down Expand Up @@ -509,6 +510,7 @@ nav:
- time_ranges: man/pl__time_ranges.md
- when: man/pl__when.md
- List:
- list_agg: man/expr_list_agg.md
- list_all: man/expr_list_all.md
- list_any: man/expr_list_any.md
- list_arg_max: man/expr_list_arg_max.md
Expand Down
38 changes: 38 additions & 0 deletions man/expr_arr_agg.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions man/expr_list_agg.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,11 @@ SEXP savvy_PlRExpr_arg_unique__impl(SEXP self__) {
return handle_result(res);
}

SEXP savvy_PlRExpr_arr_agg__impl(SEXP self__, SEXP c_arg__expr) {
SEXP res = savvy_PlRExpr_arr_agg__ffi(self__, c_arg__expr);
return handle_result(res);
}

SEXP savvy_PlRExpr_arr_all__impl(SEXP self__) {
SEXP res = savvy_PlRExpr_arr_all__ffi(self__);
return handle_result(res);
Expand Down Expand Up @@ -1609,6 +1614,11 @@ SEXP savvy_PlRExpr_len__impl(SEXP self__) {
return handle_result(res);
}

SEXP savvy_PlRExpr_list_agg__impl(SEXP self__, SEXP c_arg__expr) {
SEXP res = savvy_PlRExpr_list_agg__ffi(self__, c_arg__expr);
return handle_result(res);
}

SEXP savvy_PlRExpr_list_all__impl(SEXP self__) {
SEXP res = savvy_PlRExpr_list_all__ffi(self__);
return handle_result(res);
Expand Down Expand Up @@ -3573,6 +3583,7 @@ static const R_CallMethodDef CallEntries[] = {
{"savvy_PlRExpr_arg_min__impl", (DL_FUNC) &savvy_PlRExpr_arg_min__impl, 1},
{"savvy_PlRExpr_arg_sort__impl", (DL_FUNC) &savvy_PlRExpr_arg_sort__impl, 3},
{"savvy_PlRExpr_arg_unique__impl", (DL_FUNC) &savvy_PlRExpr_arg_unique__impl, 1},
{"savvy_PlRExpr_arr_agg__impl", (DL_FUNC) &savvy_PlRExpr_arr_agg__impl, 2},
{"savvy_PlRExpr_arr_all__impl", (DL_FUNC) &savvy_PlRExpr_arr_all__impl, 1},
{"savvy_PlRExpr_arr_any__impl", (DL_FUNC) &savvy_PlRExpr_arr_any__impl, 1},
{"savvy_PlRExpr_arr_arg_max__impl", (DL_FUNC) &savvy_PlRExpr_arr_arg_max__impl, 1},
Expand Down Expand Up @@ -3731,6 +3742,7 @@ static const R_CallMethodDef CallEntries[] = {
{"savvy_PlRExpr_kurtosis__impl", (DL_FUNC) &savvy_PlRExpr_kurtosis__impl, 3},
{"savvy_PlRExpr_last__impl", (DL_FUNC) &savvy_PlRExpr_last__impl, 1},
{"savvy_PlRExpr_len__impl", (DL_FUNC) &savvy_PlRExpr_len__impl, 1},
{"savvy_PlRExpr_list_agg__impl", (DL_FUNC) &savvy_PlRExpr_list_agg__impl, 2},
{"savvy_PlRExpr_list_all__impl", (DL_FUNC) &savvy_PlRExpr_list_all__impl, 1},
{"savvy_PlRExpr_list_any__impl", (DL_FUNC) &savvy_PlRExpr_list_any__impl, 1},
{"savvy_PlRExpr_list_arg_max__impl", (DL_FUNC) &savvy_PlRExpr_list_arg_max__impl, 1},
Expand Down
2 changes: 2 additions & 0 deletions src/rust/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ SEXP savvy_PlRExpr_arg_max__ffi(SEXP self__);
SEXP savvy_PlRExpr_arg_min__ffi(SEXP self__);
SEXP savvy_PlRExpr_arg_sort__ffi(SEXP self__, SEXP c_arg__descending, SEXP c_arg__nulls_last);
SEXP savvy_PlRExpr_arg_unique__ffi(SEXP self__);
SEXP savvy_PlRExpr_arr_agg__ffi(SEXP self__, SEXP c_arg__expr);
SEXP savvy_PlRExpr_arr_all__ffi(SEXP self__);
SEXP savvy_PlRExpr_arr_any__ffi(SEXP self__);
SEXP savvy_PlRExpr_arr_arg_max__ffi(SEXP self__);
Expand Down Expand Up @@ -327,6 +328,7 @@ SEXP savvy_PlRExpr_item__ffi(SEXP self__, SEXP c_arg__allow_empty);
SEXP savvy_PlRExpr_kurtosis__ffi(SEXP self__, SEXP c_arg__fisher, SEXP c_arg__bias);
SEXP savvy_PlRExpr_last__ffi(SEXP self__);
SEXP savvy_PlRExpr_len__ffi(SEXP self__);
SEXP savvy_PlRExpr_list_agg__ffi(SEXP self__, SEXP c_arg__expr);
SEXP savvy_PlRExpr_list_all__ffi(SEXP self__);
SEXP savvy_PlRExpr_list_any__ffi(SEXP self__);
SEXP savvy_PlRExpr_list_arg_max__ffi(SEXP self__);
Expand Down
4 changes: 4 additions & 0 deletions src/rust/src/expr/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ impl PlRExpr {
Ok(self.inner.clone().arr().len().into())
}

fn arr_agg(&self, expr: &PlRExpr) -> Result<Self> {
Ok(self.inner.clone().arr().agg(expr.inner.clone()).into())
}

fn arr_eval(&self, expr: &PlRExpr, as_list: bool) -> Result<Self> {
Ok(self
.inner
Expand Down
4 changes: 4 additions & 0 deletions src/rust/src/expr/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,8 @@ impl PlRExpr {
.count_matches(expr.inner.clone())
.into())
}

fn list_agg(&self, expr: &PlRExpr) -> Result<Self> {
Ok(self.inner.clone().list().agg(expr.inner.clone()).into())
}
}
28 changes: 28 additions & 0 deletions tests/testthat/_snaps/expr-array.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,20 @@
Caused by error:
! Invalid operation: `array.eval` is not allowed with non-length preserving expressions. Enable `as_list` if you want to output a variable amount of items per row.

---

Code
df$select(pl$col("a")$arr$eval(1))
Condition
Error in `df$select()`:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$select()`.
Caused by error in `pl$col("a")$arr$eval()`:
! Evaluation failed in `$eval()`.
Caused by error in `pl$col("a")$arr$eval()`:
! `expr` must be a polars expression, not the number 1.

---

Code
Expand All @@ -203,3 +217,17 @@
Caused by error:
! Invalid operation: `array.eval` is not allowed with non-length preserving expressions. Enable `as_list` if you want to output a variable amount of items per row.

# arr$agg() works

Code
df$select(pl$col("a")$arr$agg(1))
Condition
Error in `df$select()`:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$select()`.
Caused by error in `pl$col("a")$arr$agg()`:
! Evaluation failed in `$agg()`.
Caused by error in `pl$col("a")$arr$agg()`:
! `expr` must be a polars expression, not the number 1.

28 changes: 28 additions & 0 deletions tests/testthat/_snaps/expr-list.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,20 @@
* ..1 = TRUE
i Did you forget to name an argument?

# eval

Code
df$with_columns(pl$concat_list("a", "b")$list$eval(1))
Condition
Error in `df$with_columns()`:
! Evaluation failed in `$with_columns()`.
Caused by error:
! Evaluation failed in `$with_columns()`.
Caused by error in `pl$concat_list("a", "b")$list$eval()`:
! Evaluation failed in `$eval()`.
Caused by error in `pl$concat_list("a", "b")$list$eval()`:
! `expr` must be a polars expression, not the number 1.

# $list$explode() works

Code
Expand Down Expand Up @@ -369,3 +383,17 @@
Output
col("foo").list.to_struct()

# list$agg() works

Code
df$select(pl$col("a")$list$agg(1))
Condition
Error in `df$select()`:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$select()`.
Caused by error in `pl$col("a")$list$agg()`:
! Evaluation failed in `$agg()`.
Caused by error in `pl$col("a")$list$agg()`:
! `expr` must be a polars expression, not the number 1.

Loading