From a6dcc4647267e498a1b638d8b276b8608d3a9c99 Mon Sep 17 00:00:00 2001 From: xiaodaigh Date: Mon, 9 Mar 2020 11:11:23 +1100 Subject: [PATCH 1/2] init --- DESCRIPTION | 3 +- NAMESPACE | 6 + R/tidytable.r | 11 ++ tests/testthat/test-tidytable.r | 196 ++++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 R/tidytable.r create mode 100644 tests/testthat/test-tidytable.r diff --git a/DESCRIPTION b/DESCRIPTION index 457262cf..730d6a86 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,7 +48,8 @@ Suggests: biglmm, speedglm, broom, - ggplot2 + ggplot2, + tidytable(>= 0.3.2) LinkingTo: Rcpp RoxygenNote: 7.0.2 diff --git a/NAMESPACE b/NAMESPACE index 6f3fa037..c014a679 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,9 @@ S3method(compute,disk.frame) S3method(delayed,disk.frame) S3method(distinct,disk.frame) S3method(do,disk.frame) +S3method(dt_filter,disk.frame) +S3method(dt_mutate,disk.frame) +S3method(dt_select,disk.frame) S3method(filter,disk.frame) S3method(full_join,disk.frame) S3method(get_chunk,disk.frame) @@ -283,6 +286,9 @@ importFrom(stats,median) importFrom(stats,quantile) importFrom(stats,runif) importFrom(stringr,fixed) +importFrom(tidytable,dt_filter) +importFrom(tidytable,dt_mutate) +importFrom(tidytable,dt_select) importFrom(utils,capture.output) importFrom(utils,head) importFrom(utils,memory.limit) diff --git a/R/tidytable.r b/R/tidytable.r new file mode 100644 index 00000000..dec52905 --- /dev/null +++ b/R/tidytable.r @@ -0,0 +1,11 @@ +#' @importFrom tidytable dt_filter +#' @export +dt_filter.disk.frame <- create_chunk_mapper(tidytable::dt_filter, as.data.frame=FALSE) + +#' @importFrom tidytable dt_mutate +#' @export +dt_mutate.disk.frame <- create_chunk_mapper(tidytable::dt_mutate, as.data.frame=FALSE) + +#' @importFrom tidytable dt_select +#' @export +dt_select.disk.frame <- create_chunk_mapper(tidytable::dt_select, as.data.frame=FALSE) diff --git a/tests/testthat/test-tidytable.r b/tests/testthat/test-tidytable.r new file mode 100644 index 00000000..e6b73c5d --- /dev/null +++ b/tests/testthat/test-tidytable.r @@ -0,0 +1,196 @@ +context("test-tidytable-verbs") + +setup({ + b = data.frame(a = 51:150, b = 1:100) + as.disk.frame(b, file.path(tempdir(), "tmp_b_dv.df"), nchunks = 5, overwrite = T) +}) + +test_that("testing select", { + b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + + df = b %>% + dt_select(a) %>% + collect + + expect_equal(ncol(df), 1) +}) + +test_that("testing rename", { + # b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + # + # df = b %>% + # rename(a_new_name = a) %>% + # collect + # + # expect_setequal(colnames(df), c("a_new_name", "b")) +}) + +test_that("testing filter", { + b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + + df = b %>% + dt_filter(a <= 100, b <= 10) %>% + collect + + expect_setequal(nrow(df), 10) +}) + +test_that("testing filter - global vars", { + b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + + one_hundred = 100 + + df = b %>% + dt_filter(a <= one_hundred, b <= 10) %>% + collect + + df_orig = b %>% + filter(a <= one_hundred, b <= 10) %>% + collect + + expect_setequal(nrow(df), 10) +}) + +test_that("testing mutate", { + library(disk.frame) + setup_disk.frame() + library(testthat) + library(tidytable) + b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + + df_orig = b %>% + dt_mutate(d = a + b) %>% + collect + + df = b %>% + dt_mutate(d = a + b) %>% + collect + + expect_setequal(sum(df$d), sum(df$a, df$b)) + + df = b %>% + dt_mutate(e = rank(desc(a))) %>% + collect + + expect_equal(nrow(df), 100) + + # need to test + value <- as.disk.frame(tibble(char = LETTERS, + num = 1:26)) + df2 = value %>% + dt_mutate(b = case_when( + char %in% c("A", "B", "C") ~ "1", + TRUE ~ char)) %>% + collect + + expect_equal(ncol(df2), 3) + + # testing + fn = function(a, b) { + a+b + } + + df3 = value %>% + dt_mutate(b = fn(num, num)) %>% + collect + + expect_equal(ncol(df3), 3) + + + global_var = 100 + + df4 = value %>% + dt_mutate(b = fn(num, num), d = global_var*2) %>% + collect + + expect_equal(ncol(df4), 4) + expect_true(all(df4$d == 200)) +}) + +test_that("testing mutate user-defined function", { + b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + + + udf = function(a1, b1) { + a1 + b1 + } + + df = b %>% + dt_mutate(d = udf(a,b)) %>% + collect + + df_orig = b %>% + mutate(d = udf(a,b)) %>% + collect + + expect_setequal(sum(df$d), sum(df$a, df$b)) +}) + +test_that("testing transmute", { + # b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + # + # df = b %>% + # transmute(d = a + b) %>% + # collect + # + # expect_setequal(names(df), c("d")) +}) + +test_that("testing arrange", { + # b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + # + # expect_warning(df <- b %>% + # mutate(random_unif = runif(dplyr::n())) %>% + # arrange(desc(random_unif))) + # + # df <- b %>% + # mutate(random_unif = runif(dplyr::n())) %>% + # chunk_arrange(desc(random_unif)) + # + # x = purrr::map_lgl(1:nchunks(df), ~{ + # is.unsorted(.x) == FALSE + # }) + # + # expect_true(all(x)) +}) + +test_that("testing chunk_summarise", { + # b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) + # + # df = b %>% + # chunk_summarise(suma = sum(a)) %>% + # collect %>% + # summarise(suma = sum(suma)) + # + # expect_equal(df$suma, collect(b)$a %>% sum) +}) + +test_that("testing mutate within function works", { + test_f <- function(params, x_df){ + x_df %>% mutate(aha = params[1]*cyl + params[2]*disp) + } + + expect_true("aha" %in% names(test_f(c(1, 2), mtcars))) + + test_f <- function(params, x_df){ + x_df %>% dt_mutate(aha = params[1]*cyl + params[2]*disp) + } + + expect_true("aha" %in% names(test_f(c(1, 2), mtcars))) +}) + +test_that("filter failure: prevent github #191 regression", { + flights_df = as.disk.frame(nycflights13::flights) + + # expect error due to syntax error + expect_warning(expect_error(flights_df %>% + dt_filter(tailnum %in% paste0(unique(nycflights13::flights$tailnum)[1:60]), "") %>% + collect)) + + delete(flights_df) +}) + + +teardown({ + fs::dir_delete(file.path(tempdir(), "tmp_b_dv.df")) +}) \ No newline at end of file From cd5268aa828ea491df6332d472b5b97b37cbb1bb Mon Sep 17 00:00:00 2001 From: xiaodaigh Date: Mon, 16 Mar 2020 23:09:15 +1100 Subject: [PATCH 2/2] minor --- DESCRIPTION | 2 +- tests/testthat/test-tidytable.r | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 730d6a86..17700c10 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,7 +52,7 @@ Suggests: tidytable(>= 0.3.2) LinkingTo: Rcpp -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Encoding: UTF-8 URL: https://diskframe.com BugReports: https://github.com/xiaodaigh/disk.frame/issues diff --git a/tests/testthat/test-tidytable.r b/tests/testthat/test-tidytable.r index e6b73c5d..78ab2bd0 100644 --- a/tests/testthat/test-tidytable.r +++ b/tests/testthat/test-tidytable.r @@ -1,6 +1,8 @@ context("test-tidytable-verbs") setup({ + # require tidytable to work + library(tidytable) b = data.frame(a = 51:150, b = 1:100) as.disk.frame(b, file.path(tempdir(), "tmp_b_dv.df"), nchunks = 5, overwrite = T) }) @@ -52,10 +54,6 @@ test_that("testing filter - global vars", { }) test_that("testing mutate", { - library(disk.frame) - setup_disk.frame() - library(testthat) - library(tidytable) b = disk.frame(file.path(tempdir(), "tmp_b_dv.df")) df_orig = b %>% @@ -91,7 +89,7 @@ test_that("testing mutate", { } df3 = value %>% - dt_mutate(b = fn(num, num)) %>% + mutate(b = fn(num, num)) %>% collect expect_equal(ncol(df3), 3)