Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@

- `<expr>$item()` to strictly extract a single value from an expression (#1652).

### Other changes

- In `<expr>$str$contains_any()` and `<expr>$str$replace_many()`, strings that
are passed to the `patterns` argument are now parsed as column names (as in
`<expr>$str$extract_many()`) (#1660).

## polars 1.6.0

This is an update that corresponds to Python Polars 1.35.2.
Expand Down
24 changes: 12 additions & 12 deletions R/expr-string.R
Original file line number Diff line number Diff line change
Expand Up @@ -1115,8 +1115,10 @@ expr_str_reverse <- function() {
#'
#' This function determines if any of the patterns find a match.
#' @inherit expr_str_contains params return
#' @param patterns Character vector or something can be coerced to strings [Expr]
#' of a valid regex pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
#' @param patterns String patterns to search. Accepts expression input. Strings
#' are parsed as column names, and other non-expression inputs are parsed as
#' literals. To use the same character vector for all rows, use
#' `list(c(...))` instead of `c(...)` (see Examples).
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this sentence because it would seem natural to use e.g. c("a", "b") to look for patterns "a" and "b" in all rows (this is what we did in most tests), but this behaviour will change. It is quite counter-intuitive to wrap this in list() (I didn't get this to work on the first try), so I think it's helpful to mention this here.

#' @param ascii_case_insensitive Enable ASCII-aware case insensitive matching.
#' When this option is enabled, searching will be performed without respect to
#' case for ASCII letters (a-z and A-Z) only.
Expand All @@ -1132,13 +1134,13 @@ expr_str_reverse <- function() {
#' )
#'
#' df$with_columns(
#' contains_any = pl$col("lyrics")$str$contains_any(c("you", "me"))
#' contains_any = pl$col("lyrics")$str$contains_any(list(c("you", "me")))
#' )
expr_str_contains_any <- function(patterns, ..., ascii_case_insensitive = FALSE) {
wrap({
check_dots_empty0(...)
self$`_rexpr`$str_contains_any(
as_polars_expr(patterns, as_lit = TRUE)$`_rexpr`,
as_polars_expr(patterns, as_lit = FALSE)$`_rexpr`,
Copy link
Collaborator Author

@etiennebacher etiennebacher Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ascii_case_insensitive
)
})
Expand All @@ -1150,7 +1152,7 @@ expr_str_contains_any <- function(patterns, ..., ascii_case_insensitive = FALSE)
#'
#' @inherit as_polars_expr return
#' @inheritParams rlang::args_dots_empty
#' @param patterns String patterns to search. Can be an Expr.
#' @inheritParams expr_str_contains_any
#' @param replace_with A vector of strings used as replacements. If this is of
#' length 1, then it is applied to all matches. Otherwise, it must be of same
#' length as the `patterns` argument.
Expand All @@ -1168,20 +1170,20 @@ expr_str_contains_any <- function(patterns, ..., ascii_case_insensitive = FALSE)
#'
#' # a replacement of length 1 is applied to all matches
#' df$with_columns(
#' remove_pronouns = pl$col("lyrics")$str$replace_many(c("you", "me"), "")
#' remove_pronouns = pl$col("lyrics")$str$replace_many(list(c("you", "me")), "")
#' )
#'
#' # if there are more than one replacement, the patterns and replacements are
#' # matched
#' df$with_columns(
#' fake_pronouns = pl$col("lyrics")$str$replace_many(c("you", "me"), c("foo", "bar"))
#' fake_pronouns = pl$col("lyrics")$str$replace_many(list(c("you", "me")), c("foo", "bar"))
#' )
expr_str_replace_many <- function(patterns, replace_with, ..., ascii_case_insensitive = FALSE) {
wrap({
check_dots_empty0(...)

self$`_rexpr`$str_replace_many(
as_polars_expr(patterns, as_lit = TRUE)$`_rexpr`,
as_polars_expr(patterns, as_lit = FALSE)$`_rexpr`,
Copy link
Collaborator Author

@etiennebacher etiennebacher Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as_polars_expr(replace_with, as_lit = TRUE)$`_rexpr`,
ascii_case_insensitive
)
Expand Down Expand Up @@ -1266,7 +1268,7 @@ expr_str_find <- function(pattern, ..., literal = FALSE, strict = TRUE) {
#'
#' @examples
#' df <- pl$DataFrame(values = "discontent")
#' patterns <- pl$lit(list(c("winter", "disco", "onte", "discontent")))
#' patterns <- list(c("winter", "disco", "onte", "discontent"))
#'
#' df$with_columns(
#' matches = pl$col("values")$str$find_many(patterns, overlapping = FALSE),
Expand Down Expand Up @@ -1374,8 +1376,6 @@ expr_str_tail <- function(n) {
#' `r lifecycle::badge("experimental")`
#' This method supports matching on string literals only,
#' and does not support regular expression matching.
#' @param patterns String patterns to search. This can be an Expr or something
#' coercible to an Expr. Strings are parsed as column names.
#' @inheritParams expr_str_contains_any
#' @inheritParams rlang::args_dots_empty
#' @param overlapping Whether matches can overlap.
Expand All @@ -1384,7 +1384,7 @@ expr_str_tail <- function(n) {
#'
#' @examples
#' df <- pl$DataFrame(values = "discontent")
#' patterns <- pl$lit(c("winter", "disco", "onte", "discontent"))
#' patterns <- list(c("winter", "disco", "onte", "discontent"))
#'
#' df$with_columns(
#' matches = pl$col("values")$str$extract_many(patterns),
Expand Down
8 changes: 5 additions & 3 deletions man/expr_str_contains_any.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions man/expr_str_extract_many.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions man/expr_str_find_many.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions man/expr_str_replace_many.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/_snaps/expr-string.md
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,8 @@
# str$replace_many

Code
dat$with_columns(pl$col("x")$str$replace_many(c("hi", "hello"), c("foo", "bar",
"foo2")))
dat$with_columns(pl$col("x")$str$replace_many(list(c("hi", "hello")), list(c(
"foo", "bar", "foo2"))))
Condition
Error in `dat$with_columns()`:
! Evaluation failed in `$with_columns()`.
Expand All @@ -444,8 +444,8 @@
---

Code
dat$with_columns(pl$col("x")$str$replace_many(c("hi", "hello", "good morning"),
c("foo", "bar")))
dat$with_columns(pl$col("x")$str$replace_many(list(c("hi", "hello",
"good morning")), list(c("foo", "bar"))))
Condition
Error in `dat$with_columns()`:
! Evaluation failed in `$with_columns()`.
Expand Down
20 changes: 12 additions & 8 deletions tests/testthat/test-expr-string.R
Original file line number Diff line number Diff line change
Expand Up @@ -828,14 +828,14 @@ test_that("str$reverse", {
test_that("str$contains_any", {
dat <- pl$DataFrame(x = c("HELLO there", "hi there", "good bye", NA))
expect_equal(
dat$with_columns(pl$col("x")$str$contains_any(c("hi", "hello"))),
dat$with_columns(pl$col("x")$str$contains_any(list(c("hi", "hello")))),
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the modified tests gave "invisible" warnings (i.e. they are not printed as warnings and don't even appear on the website), such as:

Deprecation: `str.contains_any` with a flat string datatype is deprecated.
Please use `implode` to return to previous behavior.
See [https://github.com/pola-rs/polars/issues/22149](https://github.com/pola-rs/polars/issues/22149) for more information.

pl$DataFrame(x = c(FALSE, TRUE, FALSE, NA))
)

# case insensitive
expect_equal(
dat$with_columns(
pl$col("x")$str$contains_any(c("hi", "hello"), ascii_case_insensitive = TRUE)
pl$col("x")$str$contains_any(list(c("hi", "hello")), ascii_case_insensitive = TRUE)
),
pl$DataFrame(x = c(TRUE, TRUE, FALSE, NA))
)
Expand All @@ -846,38 +846,42 @@ test_that("str$replace_many", {

expect_equal(
dat$with_columns(
pl$col("x")$str$replace_many(c("hi", "hello"), "foo")
pl$col("x")$str$replace_many(list(c("hi", "hello")), list("foo"))
),
pl$DataFrame(x = c("HELLO there", "foo there", "good bye", NA))
)

# case insensitive
expect_equal(
dat$with_columns(
pl$col("x")$str$replace_many(c("hi", "hello"), "foo", ascii_case_insensitive = TRUE)
pl$col("x")$str$replace_many(
list(c("hi", "hello")),
list("foo"),
ascii_case_insensitive = TRUE
)
),
pl$DataFrame(x = c("foo there", "foo there", "good bye", NA))
)

# identical lengths of patterns and replacements
expect_equal(
dat$with_columns(
pl$col("x")$str$replace_many(c("hi", "hello"), c("foo", "bar"))
pl$col("x")$str$replace_many(list(c("hi", "hello")), list(c("foo", "bar")))
),
pl$DataFrame(x = c("HELLO there", "foo there", "good bye", NA))
)

# error if different lengths
expect_snapshot(
dat$with_columns(
pl$col("x")$str$replace_many(c("hi", "hello"), c("foo", "bar", "foo2"))
pl$col("x")$str$replace_many(list(c("hi", "hello")), list(c("foo", "bar", "foo2")))
),
error = TRUE
)

expect_snapshot(
dat$with_columns(
pl$col("x")$str$replace_many(c("hi", "hello", "good morning"), c("foo", "bar"))
pl$col("x")$str$replace_many(list(c("hi", "hello", "good morning")), list(c("foo", "bar")))
),
error = TRUE
)
Expand Down Expand Up @@ -1037,7 +1041,7 @@ test_that("$str$tail works", {

test_that("$str$extract_many works", {
df <- pl$DataFrame(values = c("discontent", "dollar $"))
patterns <- pl$lit(c("winter", "disco", "ONTE", "discontent", "$"))
patterns <- c("winter", "disco", "ONTE", "discontent", "$")

expect_equal(
df$select(
Expand Down
Loading