From 636222f1384011233e2ff68d235b7c9d15972ee1 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Mon, 4 Aug 2025 17:20:12 +0200 Subject: [PATCH 1/7] [RDF] Remove a test that's run twice. With a precompiled snapshot, the code that was used to test jitted vs templated is now identical, so it can be removed. --- tree/dataframe/test/dataframe_snapshot.cxx | 26 +++++----------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index be4cbfe12265d..6e81a887dc161 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -1115,31 +1115,17 @@ TEST(RDFSnapshotMore, OutOfOrderSizeBranch) t.Write(); } - auto check = [](const std::vector &sizes, const std::vector &vecs) { - EXPECT_EQ(sizes.at(0), 1); - EXPECT_EQ(sizes.at(1), 2); - EXPECT_EQ(sizes.at(2), 3); - EXPECT_TRUE(All(vecs.at(0) == ROOT::RVecF{1})); - EXPECT_TRUE(All(vecs.at(1) == ROOT::RVecF{1, 2})); - EXPECT_TRUE(All(vecs.at(2) == ROOT::RVecF{1, 2, 3})); - }; - - { - // fully typed Snapshot - auto out = ROOT::RDataFrame("t", inFile).Snapshot("t", outFile, {"vec", "sz"}); - auto sizes = out->Take("sz"); - auto vecs = out->Take("vec"); - - check(*sizes, *vecs); - } - { - // jitted Snapshot auto out = ROOT::RDataFrame("t", inFile).Snapshot("t", outFile, {"vec", "sz"}); auto sizes = out->Take("sz"); auto vecs = out->Take("vec"); - check(*sizes, *vecs); + EXPECT_EQ(sizes->at(0), 1); + EXPECT_EQ(sizes->at(1), 2); + EXPECT_EQ(sizes->at(2), 3); + EXPECT_TRUE(All(vecs->at(0) == ROOT::RVecF{1})); + EXPECT_TRUE(All(vecs->at(1) == ROOT::RVecF{1, 2})); + EXPECT_TRUE(All(vecs->at(2) == ROOT::RVecF{1, 2, 3})); } gSystem->Unlink(inFile); From 340f256775ee7eb715758b05b28eefb33b568219 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Thu, 7 Aug 2025 09:17:43 +0200 Subject: [PATCH 2/7] [RDF] Extend the test for snapshot of fixed-size arrays. - Make the array longer instead of shorter. - Fill the array with changing values. - Add a static_assert to ensure that the array size really changes. - Provide a multi-threaded version of the test. --- tree/dataframe/test/dataframe_snapshot.cxx | 73 ++++++++++++++++++---- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index 6e81a887dc161..cc4fbc613b13d 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -250,7 +250,8 @@ TEST(RDFSnapshotMore, BasketSizePreservation) // fixture that provides fixed and variable sized arrays as RDF columns class RDFSnapshotArrays : public ::testing::Test { protected: - const static unsigned int kNEvents = 10u; + static constexpr unsigned int kNEvents = 10u; + static constexpr unsigned int kFixedSize = 4u; static const std::vector kFileNames; static void SetUpTestCase() @@ -263,18 +264,17 @@ class RDFSnapshotArrays : public ::testing::Test { TTree t("arrayTree", "arrayTree"); // doubles, floats - const unsigned int fixedSize = 4u; - float fixedSizeArr[fixedSize]; - t.Branch("fixedSizeArr", fixedSizeArr, ("fixedSizeArr[" + std::to_string(fixedSize) + "]/F").c_str()); + float fixedSizeArr[kFixedSize]; + t.Branch("fixedSizeArr", fixedSizeArr, ("fixedSizeArr[" + std::to_string(kFixedSize) + "]/F").c_str()); unsigned int size = 0u; t.Branch("size", &size); double *varSizeArr = new double[eventsPerFile * 100u]; t.Branch("varSizeArr", varSizeArr, "varSizeArr[size]/D"); // bools. std::vector makes bool treatment in RDF special - bool fixedSizeBoolArr[fixedSize]; + bool fixedSizeBoolArr[kFixedSize]; t.Branch("fixedSizeBoolArr", fixedSizeBoolArr, - ("fixedSizeBoolArr[" + std::to_string(fixedSize) + "]/O").c_str()); + ("fixedSizeBoolArr[" + std::to_string(kFixedSize) + "]/O").c_str()); bool *varSizeBoolArr = new bool[eventsPerFile * 100u]; t.Branch("varSizeBoolArr", varSizeBoolArr, "varSizeBoolArr[size]/O"); @@ -509,15 +509,29 @@ TEST_F(RDFSnapshotArrays, SingleThreadJitted) TEST_F(RDFSnapshotArrays, RedefineArray) { + static constexpr unsigned int newArraySize = 6u; + static_assert(kFixedSize != newArraySize); + RDataFrame df("arrayTree", kFileNames); - auto df2 = df.Redefine("fixedSizeArr", [] { return ROOT::RVecF{42.f, 42.f}; }) - .Snapshot("t", "test_snapshotRVecRedefineArray.root", {"fixedSizeArr"}); + auto df2 = + df.Redefine("fixedSizeArr", + [](ULong64_t entry) { + return ROOT::RVecF{float(entry), entry + 1.f, entry + 2.f, entry + 3.f, entry + 4.f, entry + 5.f}; + }, + {"rdfentry_"}) + .Snapshot("t", "test_snapshotRVecRedefineArray.root", {"fixedSizeArr"}); df2->Foreach( - [](const ROOT::RVecF &v) { - EXPECT_EQ(v.size(), 2u); // not 4 as it was in the original input - EXPECT_TRUE(All(v == ROOT::RVecF{42.f, 42.f})); + [](const ROOT::RVecF &v, ULong64_t entry) { + EXPECT_EQ(v.size(), newArraySize); // not 4 as it was in the original input + EXPECT_FLOAT_EQ(v.front(), entry); + std::vector diffs(newArraySize); + std::adjacent_difference(v.begin(), v.end(), diffs.begin()); + + std::vector target(6, 1.f); + target.front() = entry; + EXPECT_EQ(diffs, target); }, - {"fixedSizeArr"}); + {"fixedSizeArr", "rdfentry_"}); gSystem->Unlink("test_snapshotRVecRedefineArray.root"); } @@ -1350,6 +1364,41 @@ TEST_F(RDFSnapshotArrays, MultiThreadJitted) ROOT::DisableImplicitMT(); } +TEST_F(RDFSnapshotArrays, RedefineArrayMT) +{ + static constexpr unsigned int newArraySize = 6u; + static_assert(kFixedSize != newArraySize); + ROOT::EnableImplicitMT(3); + + // More input files than threads, so output branches need to be bound to new inputs + std::vector fileNames(kFileNames); + std::copy(kFileNames.begin(), kFileNames.end(), std::back_inserter(fileNames)); + std::copy(kFileNames.begin(), kFileNames.end(), std::back_inserter(fileNames)); + + RDataFrame df("arrayTree", fileNames); + auto df2 = + df.Redefine("fixedSizeArr", + [](ULong64_t entry) { + return ROOT::RVecF{float(entry), entry + 1.f, entry + 2.f, entry + 3.f, entry + 4.f, entry + 5.f}; + }, + {"rdfentry_"}) + .Snapshot("t", "test_snapshotRVecRedefineArray.root", {"fixedSizeArr"}); + EXPECT_EQ(df2->Count().GetValue(), 3 * kNEvents); + df2->Foreach( + [](const ROOT::RVecF &v) { + EXPECT_EQ(v.size(), newArraySize); // not 4 as it was in the original input + EXPECT_LT(v.front(), 3 * kNEvents); + + std::vector target(newArraySize, 0.f); + for (unsigned int i = 0; i < newArraySize; ++i) + EXPECT_FLOAT_EQ(v[i], v.front() + i); + }, + {"fixedSizeArr"}); + + ROOT::DisableImplicitMT(); + gSystem->Unlink("test_snapshotRVecRedefineArray.root"); +} + // See also https://github.com/root-project/root/issues/10225 TEST_F(RDFSnapshotArrays, WriteRVecFromFile) { From a74182de4b024bcf127c31c03027b0ac1bccd35b Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Tue, 5 Aug 2025 15:12:01 +0200 Subject: [PATCH 3/7] [RDF] Make Snapshot test more readable, add a test for collection size. Instead of testing in a for loop, (which possibly prints 10k failures), use std::all_of to make a test output more readable. Furthermore, test the size of the 3rd collection, which maybe just got forgotten. --- tree/dataframe/test/dataframe_snapshot.cxx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index cc4fbc613b13d..7b03720b27922 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -755,12 +755,13 @@ void ReadWriteCarray(const char *outFileNameBase) EXPECT_TRUE(r.Next()); EXPECT_EQ(rv.GetSize(), 100000u); EXPECT_EQ(rvb.GetSize(), 100000u); - for (auto e : rv) - EXPECT_EQ(e, 84); - for (auto e : rvb) - EXPECT_TRUE(e); - for (auto e : rvl) - EXPECT_EQ(e, 42); + EXPECT_EQ(rvl.GetSize(), 100000u); + EXPECT_TRUE(std::all_of(rv.begin(), rv.end(), [](auto elm) { return elm == 84; })) + << rv[0] << " " << rv[1] << " " << rv[2]; + EXPECT_TRUE(std::all_of(rvb.begin(), rvb.end(), [](auto elm) { return elm; })) + << rvb[0] << " " << rvb[1] << " " << rvb[2]; + EXPECT_TRUE(std::all_of(rvl.begin(), rvl.end(), [](auto elm) { return elm == 42; })) + << rvl[0] << " " << rvl[1] << " " << rvl[2]; // Size 3 EXPECT_TRUE(r.Next()); From b9a777f02fc18c4b09789e7c748d5bbc5ab7b63b Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Thu, 7 Aug 2025 11:26:05 +0200 Subject: [PATCH 4/7] [RDF] Use a RAII to enable IMT as suggested in review. --- tree/dataframe/test/dataframe_snapshot.cxx | 43 +++++++--------------- 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index 7b03720b27922..0a2c2310f77fd 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -1292,7 +1292,7 @@ TEST_F(RDFSnapshotMT, Reshuffled_friends) TEST(RDFSnapshotMore, ManyTasksPerThread) { const auto nSlots = 4u; - ROOT::EnableImplicitMT(nSlots); + TIMTEnabler imt(nSlots); // make sure the file is not here beforehand gSystem->Unlink("snapshot_manytasks_out.root"); @@ -1322,8 +1322,6 @@ TEST(RDFSnapshotMore, ManyTasksPerThread) for (auto i = 0u; i < nInputFiles; ++i) gSystem->Unlink((inputFilePrefix + std::to_string(i) + ".root").c_str()); gSystem->Unlink(outputFile); - - ROOT::DisableImplicitMT(); } void checkSnapshotArrayFileMT(RResultPtr> &df, unsigned int kNEvents) @@ -1341,35 +1339,31 @@ void checkSnapshotArrayFileMT(RResultPtr> &df, unsigned TEST_F(RDFSnapshotArrays, MultiThread) { - ROOT::EnableImplicitMT(4); + TIMTEnabler imt(4); RDataFrame tdf("arrayTree", kFileNames); auto dt = tdf.Snapshot("outTree", "test_snapshotRVecoutMT.root", {"fixedSizeArr", "size", "varSizeArr", "varSizeBoolArr", "fixedSizeBoolArr"}); checkSnapshotArrayFileMT(dt, kNEvents); - - ROOT::DisableImplicitMT(); } TEST_F(RDFSnapshotArrays, MultiThreadJitted) { - ROOT::EnableImplicitMT(4); + TIMTEnabler imt(4); RDataFrame tdf("arrayTree", kFileNames); auto dj = tdf.Snapshot("outTree", "test_snapshotRVecoutMTJitted.root", {"fixedSizeArr", "size", "varSizeArr", "varSizeBoolArr", "fixedSizeBoolArr"}); checkSnapshotArrayFileMT(dj, kNEvents); - - ROOT::DisableImplicitMT(); } TEST_F(RDFSnapshotArrays, RedefineArrayMT) { static constexpr unsigned int newArraySize = 6u; static_assert(kFixedSize != newArraySize); - ROOT::EnableImplicitMT(3); + TIMTEnabler(3); // More input files than threads, so output branches need to be bound to new inputs std::vector fileNames(kFileNames); @@ -1396,7 +1390,6 @@ TEST_F(RDFSnapshotArrays, RedefineArrayMT) }, {"fixedSizeArr"}); - ROOT::DisableImplicitMT(); gSystem->Unlink("test_snapshotRVecRedefineArray.root"); } @@ -1431,7 +1424,7 @@ TEST(RDFSnapshotMore, ColsWithCustomTitlesMT) WriteColsWithCustomTitles(tname, fname); // read and write test tree with RDF (in parallel) - ROOT::EnableImplicitMT(4); + TIMTEnabler imt(4); RDataFrame d(tname, fname); const std::string prefix = "snapshotted_"; auto res_tdf = d.Snapshot(tname, prefix + fname, {"i", "float", "arrint", "vararrint"}); @@ -1443,7 +1436,6 @@ TEST(RDFSnapshotMore, ColsWithCustomTitlesMT) // clean-up gSystem->Unlink(fname); gSystem->Unlink((prefix + fname).c_str()); - ROOT::DisableImplicitMT(); } TEST(RDFSnapshotMore, TreeWithFriendsMT) @@ -1453,7 +1445,7 @@ TEST(RDFSnapshotMore, TreeWithFriendsMT) RDataFrame(10).Define("x", []() { return 42; }).Snapshot("t", fname1, {"x"}); RDataFrame(10).Define("x", []() { return 0; }).Snapshot("t", fname2, {"x"}); - ROOT::EnableImplicitMT(); + TIMTEnabler imt(0); TFile file(fname1); auto tree = file.Get("t"); @@ -1467,7 +1459,6 @@ TEST(RDFSnapshotMore, TreeWithFriendsMT) EXPECT_EQ(df_out->Max("x").GetValue(), 42); EXPECT_EQ(df_out->GetColumnNames(), std::vector{"x"}); - ROOT::DisableImplicitMT(); gSystem->Unlink(fname1); gSystem->Unlink(fname2); gSystem->Unlink(outfname); @@ -1494,13 +1485,12 @@ TEST(RDFSnapshotMore, JittedSnapshotAndAliasedColumns) TEST(RDFSnapshotMore, LazyNotTriggeredMT) { - ROOT::EnableImplicitMT(4); + TIMTEnabler imt(4); ROOT_EXPECT_WARNING(BookLazySnapshot(), "Snapshot", "A lazy Snapshot action was booked but never triggered. The tree 't' in output file " "'lazysnapshotnottriggered_shouldnotbecreated.root' was not created. " "In case it was desired instead, remember to trigger the Snapshot operation, by " "storing its result in a variable and for example calling the GetValue() method on it."); - ROOT::DisableImplicitMT(); } TEST(RDFSnapshotMore, LazyTriggeredMT) @@ -1517,7 +1507,7 @@ TEST(RDFSnapshotMore, EmptyBuffersMT) const auto fname = "emptybuffersmt.root"; const auto treename = "t"; const unsigned int nslots = std::min(4U, std::thread::hardware_concurrency()); - ROOT::EnableImplicitMT(nslots); + TIMTEnabler imt(nslots); ROOT::RDataFrame d(10); std::atomic_bool firstWorker{true}; auto dd = d.DefineSlot("x", [&](unsigned int) { @@ -1540,15 +1530,13 @@ TEST(RDFSnapshotMore, EmptyBuffersMT) EXPECT_EQ(t->GetListOfBranches()->GetEntries(), 1); EXPECT_EQ(t->GetEntries(), Long64_t(passed)); - ROOT::DisableImplicitMT(); gSystem->Unlink(fname); } TEST(RDFSnapshotMore, ReadWriteCarrayMT) { - ROOT::EnableImplicitMT(4); + TIMTEnabler imt(4); ReadWriteCarray("ReadWriteCarrayMT"); - ROOT::DisableImplicitMT(); } TEST(RDFSnapshotMore, TClonesArrayMT) @@ -1618,12 +1606,10 @@ TEST(RDFSnapshotMore, SetMaxTreeSizeMT) // Create an RDF from the previously snapshotted file, then Snapshot again // with IMT enabled. { - ROOT::EnableImplicitMT(); + TIMTEnabler imt(0); ROOT::RDataFrame df{"T", "rdfsnapshot_ttree_sequential_setmaxtreesize.root"}; df.Snapshot("T", "rdfsnapshot_imt_setmaxtreesize.root", {"x"}); - - ROOT::DisableImplicitMT(); } // Check the file for data integrity. @@ -1668,25 +1654,22 @@ TEST(RDFSnapshotMore, ZeroOutputEntriesMT) TEST(RDFSnapshotMore, CustomBasketSizeMT) { - ROOT::EnableImplicitMT(); + TIMTEnabler imt(0); TestCustomBasketSize(); - ROOT::DisableImplicitMT(); } // Test for default basket size TEST(RDFSnapshotMore, DefaultBasketSizeMT) { - ROOT::EnableImplicitMT(); + TIMTEnabler imt(0); TestDefaultBasketSize(); - ROOT::DisableImplicitMT(); } // Test for basket size preservation TEST(RDFSnapshotMore, BasketSizePreservationMT) { - ROOT::EnableImplicitMT(); + TIMTEnabler imt(0); TestBasketSizePreservation(); - ROOT::DisableImplicitMT(); } #endif // R__USE_IMT From 155dcdb556849a0edfd10a1b01bb156eabde3c02 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Thu, 7 Aug 2025 10:51:27 +0200 Subject: [PATCH 5/7] [RDF] Don't search for input branches when snapshotting Redefine. When columns got redefined, snapshot was trying to still find input columns for the outputs. That should not be the case since none of the properties of the input branch need to be transferred. --- tree/dataframe/src/RDFSnapshotHelpers.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tree/dataframe/src/RDFSnapshotHelpers.cxx b/tree/dataframe/src/RDFSnapshotHelpers.cxx index 39736b29b5bd0..49daa4e4321d8 100644 --- a/tree/dataframe/src/RDFSnapshotHelpers.cxx +++ b/tree/dataframe/src/RDFSnapshotHelpers.cxx @@ -244,7 +244,7 @@ void SetBranchesHelper(TTree *inputTree, TTree &outputTree, ROOT::Internal::RDF: void *&actionHelperBranchPtrAddress, bool isDefine) { - auto *inputBranch = SearchForBranch(inputTree, inputBranchName); + auto *inputBranch = isDefine ? nullptr : SearchForBranch(inputTree, inputBranchName); // Respect the original bufsize and splitlevel arguments // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid From d0df45dc8ede281105c71b9e7cabbcede63450a4 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Thu, 7 Aug 2025 11:54:57 +0200 Subject: [PATCH 6/7] [RDF][NFC] Use a static inline to simplify a test fixture. --- tree/dataframe/test/dataframe_snapshot.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index 0a2c2310f77fd..94c53eac9e9fb 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -252,7 +252,7 @@ class RDFSnapshotArrays : public ::testing::Test { protected: static constexpr unsigned int kNEvents = 10u; static constexpr unsigned int kFixedSize = 4u; - static const std::vector kFileNames; + static inline const std::vector kFileNames = {"test_snapshotarray1.root", "test_snapshotarray2.root"}; static void SetUpTestCase() { @@ -305,7 +305,6 @@ class RDFSnapshotArrays : public ::testing::Test { gSystem->Unlink(fname.c_str()); } }; -const std::vector RDFSnapshotArrays::kFileNames = {"test_snapshotarray1.root", "test_snapshotarray2.root"}; /********* SINGLE THREAD TESTS ***********/ From 1a82395deb6bd57ec02bb95f8d074e4c88adf887 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Thu, 7 Aug 2025 11:55:40 +0200 Subject: [PATCH 7/7] [RDF] Add test carrying a Define across an input file boundary. The code for updating branch addresses of TBranchElement instances was triggered only in the context of a Redefine. Here, a test with a Define is added to increase the coverage in snapshot with multiple input files. --- tree/dataframe/test/dataframe_snapshot.cxx | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tree/dataframe/test/dataframe_snapshot.cxx b/tree/dataframe/test/dataframe_snapshot.cxx index 94c53eac9e9fb..2230916a4dda0 100644 --- a/tree/dataframe/test/dataframe_snapshot.cxx +++ b/tree/dataframe/test/dataframe_snapshot.cxx @@ -506,6 +506,31 @@ TEST_F(RDFSnapshotArrays, SingleThreadJitted) checkSnapshotArrayFile(dj, kNEvents); } +// Run a define across a TTree boundary to trigger an update of branch addresses +TEST_F(RDFSnapshotArrays, AddDefine) +{ + RDataFrame df("arrayTree", kFileNames); + auto df2 = df.Define("RVecColumn", + [](ULong64_t entry) { + return ROOT::RVec{entry, entry + 1, entry + 2, entry + 3, entry + 4, entry + 5}; + }, + {"rdfentry_"}) + .Snapshot("t", "test_snapshotDefineRVec.root", {"RVecColumn"}); + df2->Foreach( + [](const ROOT::RVec &v, ULong64_t entry) { + EXPECT_EQ(v.size(), 6); + EXPECT_FLOAT_EQ(v.front(), entry); + + std::vector column(v.begin(), v.end()); + std::vector target(6); + std::generate(target.begin(), target.end(), [i = entry]() mutable { return i++; }); + EXPECT_EQ(column, target); + }, + {"RVecColumn", "rdfentry_"}); + + gSystem->Unlink("test_snapshotDefineRVec.root.root"); +} + TEST_F(RDFSnapshotArrays, RedefineArray) { static constexpr unsigned int newArraySize = 6u;