-
Notifications
You must be signed in to change notification settings - Fork 41
docs: Multiple fixes for Aho-Corasick functions #1660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1115,8 +1115,10 @@ expr_str_reverse <- function() { | |
| #' | ||
| #' This function determines if any of the patterns find a match. | ||
| #' @inherit expr_str_contains params return | ||
| #' @param patterns Character vector or something can be coerced to strings [Expr] | ||
| #' of a valid regex pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/). | ||
| #' @param patterns String patterns to search. Accepts expression input. Strings | ||
| #' are parsed as column names, and other non-expression inputs are parsed as | ||
| #' literals. To use the same character vector for all rows, use | ||
| #' `list(c(...))` instead of `c(...)` (see Examples). | ||
| #' @param ascii_case_insensitive Enable ASCII-aware case insensitive matching. | ||
| #' When this option is enabled, searching will be performed without respect to | ||
| #' case for ASCII letters (a-z and A-Z) only. | ||
|
|
@@ -1132,13 +1134,13 @@ expr_str_reverse <- function() { | |
| #' ) | ||
| #' | ||
| #' df$with_columns( | ||
| #' contains_any = pl$col("lyrics")$str$contains_any(c("you", "me")) | ||
| #' contains_any = pl$col("lyrics")$str$contains_any(list(c("you", "me"))) | ||
| #' ) | ||
| expr_str_contains_any <- function(patterns, ..., ascii_case_insensitive = FALSE) { | ||
| wrap({ | ||
| check_dots_empty0(...) | ||
| self$`_rexpr`$str_contains_any( | ||
| as_polars_expr(patterns, as_lit = TRUE)$`_rexpr`, | ||
| as_polars_expr(patterns, as_lit = FALSE)$`_rexpr`, | ||
|
||
| ascii_case_insensitive | ||
| ) | ||
| }) | ||
|
|
@@ -1150,7 +1152,7 @@ expr_str_contains_any <- function(patterns, ..., ascii_case_insensitive = FALSE) | |
| #' | ||
| #' @inherit as_polars_expr return | ||
| #' @inheritParams rlang::args_dots_empty | ||
| #' @param patterns String patterns to search. Can be an Expr. | ||
| #' @inheritParams expr_str_contains_any | ||
| #' @param replace_with A vector of strings used as replacements. If this is of | ||
| #' length 1, then it is applied to all matches. Otherwise, it must be of same | ||
| #' length as the `patterns` argument. | ||
|
|
@@ -1168,20 +1170,20 @@ expr_str_contains_any <- function(patterns, ..., ascii_case_insensitive = FALSE) | |
| #' | ||
| #' # a replacement of length 1 is applied to all matches | ||
| #' df$with_columns( | ||
| #' remove_pronouns = pl$col("lyrics")$str$replace_many(c("you", "me"), "") | ||
| #' remove_pronouns = pl$col("lyrics")$str$replace_many(list(c("you", "me")), "") | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| #' ) | ||
| #' | ||
| #' # if there are more than one replacement, the patterns and replacements are | ||
| #' # matched | ||
| #' df$with_columns( | ||
| #' fake_pronouns = pl$col("lyrics")$str$replace_many(c("you", "me"), c("foo", "bar")) | ||
| #' fake_pronouns = pl$col("lyrics")$str$replace_many(list(c("you", "me")), c("foo", "bar")) | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| #' ) | ||
| expr_str_replace_many <- function(patterns, replace_with, ..., ascii_case_insensitive = FALSE) { | ||
| wrap({ | ||
| check_dots_empty0(...) | ||
|
|
||
| self$`_rexpr`$str_replace_many( | ||
| as_polars_expr(patterns, as_lit = TRUE)$`_rexpr`, | ||
| as_polars_expr(patterns, as_lit = FALSE)$`_rexpr`, | ||
|
||
| as_polars_expr(replace_with, as_lit = TRUE)$`_rexpr`, | ||
| ascii_case_insensitive | ||
| ) | ||
|
|
@@ -1266,7 +1268,7 @@ expr_str_find <- function(pattern, ..., literal = FALSE, strict = TRUE) { | |
| #' | ||
| #' @examples | ||
| #' df <- pl$DataFrame(values = "discontent") | ||
| #' patterns <- pl$lit(list(c("winter", "disco", "onte", "discontent"))) | ||
| #' patterns <- list(c("winter", "disco", "onte", "discontent")) | ||
| #' | ||
| #' df$with_columns( | ||
| #' matches = pl$col("values")$str$find_many(patterns, overlapping = FALSE), | ||
|
|
@@ -1374,8 +1376,6 @@ expr_str_tail <- function(n) { | |
| #' `r lifecycle::badge("experimental")` | ||
| #' This method supports matching on string literals only, | ||
| #' and does not support regular expression matching. | ||
| #' @param patterns String patterns to search. This can be an Expr or something | ||
| #' coercible to an Expr. Strings are parsed as column names. | ||
| #' @inheritParams expr_str_contains_any | ||
| #' @inheritParams rlang::args_dots_empty | ||
| #' @param overlapping Whether matches can overlap. | ||
|
|
@@ -1384,7 +1384,7 @@ expr_str_tail <- function(n) { | |
| #' | ||
| #' @examples | ||
| #' df <- pl$DataFrame(values = "discontent") | ||
| #' patterns <- pl$lit(c("winter", "disco", "onte", "discontent")) | ||
| #' patterns <- list(c("winter", "disco", "onte", "discontent")) | ||
| #' | ||
| #' df$with_columns( | ||
| #' matches = pl$col("values")$str$extract_many(patterns), | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -828,14 +828,14 @@ test_that("str$reverse", { | |
| test_that("str$contains_any", { | ||
| dat <- pl$DataFrame(x = c("HELLO there", "hi there", "good bye", NA)) | ||
| expect_equal( | ||
| dat$with_columns(pl$col("x")$str$contains_any(c("hi", "hello"))), | ||
| dat$with_columns(pl$col("x")$str$contains_any(list(c("hi", "hello")))), | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the modified tests gave "invisible" warnings (i.e. they are not printed as warnings and don't even appear on the website), such as: |
||
| pl$DataFrame(x = c(FALSE, TRUE, FALSE, NA)) | ||
| ) | ||
|
|
||
| # case insensitive | ||
| expect_equal( | ||
| dat$with_columns( | ||
| pl$col("x")$str$contains_any(c("hi", "hello"), ascii_case_insensitive = TRUE) | ||
| pl$col("x")$str$contains_any(list(c("hi", "hello")), ascii_case_insensitive = TRUE) | ||
| ), | ||
| pl$DataFrame(x = c(TRUE, TRUE, FALSE, NA)) | ||
| ) | ||
|
|
@@ -846,38 +846,42 @@ test_that("str$replace_many", { | |
|
|
||
| expect_equal( | ||
| dat$with_columns( | ||
| pl$col("x")$str$replace_many(c("hi", "hello"), "foo") | ||
| pl$col("x")$str$replace_many(list(c("hi", "hello")), list("foo")) | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ), | ||
| pl$DataFrame(x = c("HELLO there", "foo there", "good bye", NA)) | ||
| ) | ||
|
|
||
| # case insensitive | ||
| expect_equal( | ||
| dat$with_columns( | ||
| pl$col("x")$str$replace_many(c("hi", "hello"), "foo", ascii_case_insensitive = TRUE) | ||
| pl$col("x")$str$replace_many( | ||
| list(c("hi", "hello")), | ||
| list("foo"), | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ascii_case_insensitive = TRUE | ||
| ) | ||
| ), | ||
| pl$DataFrame(x = c("foo there", "foo there", "good bye", NA)) | ||
| ) | ||
|
|
||
| # identical lengths of patterns and replacements | ||
| expect_equal( | ||
| dat$with_columns( | ||
| pl$col("x")$str$replace_many(c("hi", "hello"), c("foo", "bar")) | ||
| pl$col("x")$str$replace_many(list(c("hi", "hello")), list(c("foo", "bar"))) | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ), | ||
| pl$DataFrame(x = c("HELLO there", "foo there", "good bye", NA)) | ||
| ) | ||
|
|
||
| # error if different lengths | ||
| expect_snapshot( | ||
| dat$with_columns( | ||
| pl$col("x")$str$replace_many(c("hi", "hello"), c("foo", "bar", "foo2")) | ||
| pl$col("x")$str$replace_many(list(c("hi", "hello")), list(c("foo", "bar", "foo2"))) | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ), | ||
| error = TRUE | ||
| ) | ||
|
|
||
| expect_snapshot( | ||
| dat$with_columns( | ||
| pl$col("x")$str$replace_many(c("hi", "hello", "good morning"), c("foo", "bar")) | ||
| pl$col("x")$str$replace_many(list(c("hi", "hello", "good morning")), list(c("foo", "bar"))) | ||
etiennebacher marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ), | ||
| error = TRUE | ||
| ) | ||
|
|
@@ -1037,7 +1041,7 @@ test_that("$str$tail works", { | |
|
|
||
| test_that("$str$extract_many works", { | ||
| df <- pl$DataFrame(values = c("discontent", "dollar $")) | ||
| patterns <- pl$lit(c("winter", "disco", "ONTE", "discontent", "$")) | ||
| patterns <- c("winter", "disco", "ONTE", "discontent", "$") | ||
etiennebacher marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| expect_equal( | ||
| df$select( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this sentence because it would seem natural to use e.g.
c("a", "b")to look for patterns "a" and "b" in all rows (this is what we did in most tests), but this behaviour will change. It is quite counter-intuitive to wrap this inlist()(I didn't get this to work on the first try), so I think it's helpful to mention this here.