From 0e1721a535a566c204a556543984defb420fc55e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 9 Nov 2025 16:26:29 +0100 Subject: [PATCH 1/9] add [[<- syntax --- NEWS.md | 2 ++ R/data.table.R | 13 +++++++++++++ inst/tests/tests.Rraw | 11 ++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4a821991b..c8ddf3774 100644 --- a/NEWS.md +++ b/NEWS.md @@ -346,6 +346,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T 23. `fread()` auto-detects separators for single-column files consisting solely of quoted values (e.g. `"this_that"\n"2025-01-01 00:00:01"`), [#7366](https://github.com/Rdatatable/data.table/issues/7366). Thanks @arunsrinivasan for the report and @ben-schwen for the fix. +24. Assigning via `[[<-` now works for data.tables by reference, so forms like `DT[["col"]] = value` or `DT[[1, "col"]] = value` keep over-allocation intact instead of falling back to the data.frame method, [#6734](https://github.com/Rdatatable/data.table/issues/6734). Thanks @mb706 for the report and @ben-schwen for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/R/data.table.R b/R/data.table.R index db74384c4..0c6d0eeec 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2308,6 +2308,19 @@ tail.data.table = function(x, n=6L, ...) { set(x,j=name,value=value) # important i is missing here } +"[[<-.data.table" = function(x, i, j, value) { + if (!cedta()) { + ans = `[[<-.data.frame`(x, i, j, value) # nocov + return(setalloccol(ans)) # nocov. over-allocate (again) + } + x = copy(x) + if (nargs()<4L) { + set(x, j=i, value=value) + } else { + set(x, i, j, value) + } +} + as.data.frame.data.table = function(x, row.names = NULL, ...) { ans = setDF(copy(x), rownames = row.names) # issue #5319 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e6ef9be1d..d88eef022 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1405,10 +1405,15 @@ test(437.2, truelength(DT), 2044L) DT = data.table(a=1:3,b=4:6) tl = truelength(DT) DT$foo = 7L -test(438, truelength(DT), tl) +test(438.1, truelength(DT), tl) DT[,"bar"] = 8L -test(439, truelength(DT), tl+2L) -test(440, DT, data.table(a=1:3,b=4:6,foo=7L,bar=8L)) +test(438.2, truelength(DT), tl+2L) +# also allow DT[[...]]= syntax #6734 +DT[["baz"]] = 9L +test(438.3, truelength(DT), tl+2L) +DT[[1,"qux"]] = 10L +test(438.4, truelength(DT), tl+3L) +test(438.5, DT, data.table(a=1:3,b=4:6,foo=7L,bar=8L,baz=9L,qux=c(10L,NA_integer_,NA_integer_))) # Test rbind works by colname now, for consistency with base, FR#1634 DT = data.table(a=1:3,b=4:6) From f61705fac650a2205bb4ab7e6014adec434ba450 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 9 Nov 2025 16:29:43 +0100 Subject: [PATCH 2/9] wording NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c8ddf3774..df2437cd6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -346,7 +346,7 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T 23. `fread()` auto-detects separators for single-column files consisting solely of quoted values (e.g. `"this_that"\n"2025-01-01 00:00:01"`), [#7366](https://github.com/Rdatatable/data.table/issues/7366). Thanks @arunsrinivasan for the report and @ben-schwen for the fix. -24. Assigning via `[[<-` now works for data.tables by reference, so forms like `DT[["col"]] = value` or `DT[[1, "col"]] = value` keep over-allocation intact instead of falling back to the data.frame method, [#6734](https://github.com/Rdatatable/data.table/issues/6734). Thanks @mb706 for the report and @ben-schwen for the fix. +24. Assigning via `[[<-` now changes data.tables by reference, so forms like `DT[["col"]] = value` or `DT[[1, "col"]] = value` keep over-allocation intact instead of falling back to the data.frame method, [#6734](https://github.com/Rdatatable/data.table/issues/6734). Thanks @mb706 for the report and @ben-schwen for the implementation. ### NOTES From 339761b2551cac0a0fe6f99b519ac0ef697513ae Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 9 Nov 2025 16:44:49 +0100 Subject: [PATCH 3/9] add to NAMESPACE --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 361b706d3..9d5ff618c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,7 +75,7 @@ export(fctr) S3method("[", data.table) S3method("[<-", data.table) # S3method("[[", data.table) -# S3method("[[<-", data.table) +S3method("[[<-", data.table) S3method("$<-", data.table) S3method(print, data.table) S3method(as.data.table, data.table) From cee6cc197bc75814d9b490008f68c34c219a02b6 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 9 Nov 2025 18:02:07 +0100 Subject: [PATCH 4/9] fix as.data.table dispatch --- R/as.data.table.R | 4 ++++ R/data.table.R | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index f849aa8c1..621fd2566 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -130,6 +130,10 @@ as.data.table.list = function(x, # e.g. cbind(foo=DF1, bar=DF2) have .named=c(TRUE,TRUE) due to the foo= and bar= and trigger "prefix." for non-vector items ...) { + if (is.data.table(x)) { + # operate on plain list to avoid [[<-.data.table dispatch + x = as.list(x) + } n = length(x) eachnrow = integer(n) # vector of lengths of each column. may not be equal if silent repetition is required. eachncol = integer(n) diff --git a/R/data.table.R b/R/data.table.R index 0c6d0eeec..9e6735782 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2309,12 +2309,16 @@ tail.data.table = function(x, n=6L, ...) { } "[[<-.data.table" = function(x, i, j, value) { + n = nargs() if (!cedta()) { - ans = `[[<-.data.frame`(x, i, j, value) # nocov - return(setalloccol(ans)) # nocov. over-allocate (again) + # nocov start + ans = if (n<4L) `[<-.data.frame`(x, i, value=value) + else `[<-.data.frame`(x, i, j, value) + return(setalloccol(ans)) # over-allocate (again) + # nocov end } x = copy(x) - if (nargs()<4L) { + if (n<4L) { set(x, j=i, value=value) } else { set(x, i, j, value) From dd807254662df01508e55b5fce9d380420e17be5 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 9 Nov 2025 18:53:25 +0100 Subject: [PATCH 5/9] make linter happy --- R/as.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 621fd2566..e1cc5da85 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -131,7 +131,7 @@ as.data.table.list = function(x, ...) { if (is.data.table(x)) { - # operate on plain list to avoid [[<-.data.table dispatch + # operate on plain list to avoid [[<-.data.table dispatch x = as.list(x) } n = length(x) From 74dded2521ad06b95caa992a9580f7bdf5e5768b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 10 Nov 2025 10:35:12 +0100 Subject: [PATCH 6/9] add test for as.data.table.list conversion --- R/as.data.table.R | 10 ++++++---- inst/tests/tests.Rraw | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index e1cc5da85..677e72189 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -130,10 +130,6 @@ as.data.table.list = function(x, # e.g. cbind(foo=DF1, bar=DF2) have .named=c(TRUE,TRUE) due to the foo= and bar= and trigger "prefix." for non-vector items ...) { - if (is.data.table(x)) { - # operate on plain list to avoid [[<-.data.table dispatch - x = as.list(x) - } n = length(x) eachnrow = integer(n) # vector of lengths of each column. may not be equal if silent repetition is required. eachncol = integer(n) @@ -145,6 +141,12 @@ as.data.table.list = function(x, rownames_ = NULL check_rownames = !isFALSE(keep.rownames) + # conversion must happen after capturing names + if (is.data.table(x)) { + # operate on plain list to avoid [[<-.data.table dispatch + x = as.list(x) + } + for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d88eef022..4af665d73 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4094,6 +4094,8 @@ test(1140, as.data.table(X), data.table(a=c(1:2,1L), b=c(1:3)), test(1141.1, data.table(a=1:2, b=1:3), data.table(a=c(1L,2L,1L), b=1:3), warning="Item 1 has 2 rows but longest item has 3; recycled") test(1141.2, data.table(a=1:2, data.table(x=1:5, y=6:10)), data.table(a=c(1L,2L,1L,2L,1L), x=1:5, y=6:10), warning="Item 1 has 2 rows but longest item has 5; recycled") test(1141.3, data.table(a=1:5, data.table(x=c(1,2), y=c(3,4))), data.table(a=c(1:5), x=c(1,2,1,2,1), y=c(3,4,3,4,3)), warning="Item 2 has 2 rows but longest item has 5; recycled") +x = structure(list(1, 2), class = c("list", "data.table")) +test(1141.4, as.data.table(x), data.table(V1=c(1), V2=c(2))) # Fix for bug #79 - DT[, foo()] returns function definition. DT <- data.table(a=1:2) From 621d5f59f6a5f7947e9c9c7848b2ea7c8b69b5a7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 10 Nov 2025 10:40:50 +0100 Subject: [PATCH 7/9] add tests for numeric assignment --- inst/tests/tests.Rraw | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4af665d73..86edb0f67 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1409,8 +1409,11 @@ test(438.1, truelength(DT), tl) DT[,"bar"] = 8L test(438.2, truelength(DT), tl+2L) # also allow DT[[...]]= syntax #6734 -DT[["baz"]] = 9L +DT[["baz"]] = 1L test(438.3, truelength(DT), tl+2L) +test(483.4, DT$baz, c(1L,1L,1L)) +DT[[5]] = 9L +test(438.5, truelength(DT), tl+3L) DT[[1,"qux"]] = 10L test(438.4, truelength(DT), tl+3L) test(438.5, DT, data.table(a=1:3,b=4:6,foo=7L,bar=8L,baz=9L,qux=c(10L,NA_integer_,NA_integer_))) From da7a29916c1833d0787a4dd0c7c8d2a00382cd6b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 10 Nov 2025 10:45:32 +0100 Subject: [PATCH 8/9] fix test num --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 86edb0f67..16834a2df 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1415,8 +1415,8 @@ test(483.4, DT$baz, c(1L,1L,1L)) DT[[5]] = 9L test(438.5, truelength(DT), tl+3L) DT[[1,"qux"]] = 10L -test(438.4, truelength(DT), tl+3L) -test(438.5, DT, data.table(a=1:3,b=4:6,foo=7L,bar=8L,baz=9L,qux=c(10L,NA_integer_,NA_integer_))) +test(438.6, truelength(DT), tl+3L) +test(438.7, DT, data.table(a=1:3,b=4:6,foo=7L,bar=8L,baz=9L,qux=c(10L,NA_integer_,NA_integer_))) # Test rbind works by colname now, for consistency with base, FR#1634 DT = data.table(a=1:3,b=4:6) From b0cf6d443f7fc18d87224c285af2042b97e5ff38 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 10 Nov 2025 11:17:41 +0100 Subject: [PATCH 9/9] fix testnum typo --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 16834a2df..c6d622797 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1411,7 +1411,7 @@ test(438.2, truelength(DT), tl+2L) # also allow DT[[...]]= syntax #6734 DT[["baz"]] = 1L test(438.3, truelength(DT), tl+2L) -test(483.4, DT$baz, c(1L,1L,1L)) +test(438.4, DT$baz, c(1L,1L,1L)) DT[[5]] = 9L test(438.5, truelength(DT), tl+3L) DT[[1,"qux"]] = 10L