From 028c53d343792437482a8122102cb64f98c600ac Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 10 Mar 2023 12:14:02 -0500 Subject: [PATCH 01/21] cleanup [skip ci] --- src/TiledArray/math/solvers/cp.h | 2 +- src/TiledArray/tensor/tensor.h | 3 ++- src/TiledArray/tile_interface/scale.h | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/math/solvers/cp.h b/src/TiledArray/math/solvers/cp.h index d21df2d46d..f94ea259ff 100644 --- a/src/TiledArray/math/solvers/cp.h +++ b/src/TiledArray/math/solvers/cp.h @@ -35,4 +35,4 @@ using TiledArray::math::cp::CP_ALS; using TiledArray::math::cp::cp_reconstruct; } // namespace TiledArray -#endif // TILEDARRAY_MATH_SOLVERS_DIIS_H__INCLUDED +#endif // TILEDARRAY_MATH_SOLVERS_CP_H__INCLUDED diff --git a/src/TiledArray/tensor/tensor.h b/src/TiledArray/tensor/tensor.h index d3c91361f5..1f09b92701 100644 --- a/src/TiledArray/tensor/tensor.h +++ b/src/TiledArray/tensor/tensor.h @@ -872,8 +872,9 @@ class Tensor { const_reference operator()(const Index&... i) const { TA_ASSERT(!this->empty()); TA_ASSERT(this->batch_size() == 1); + using Int = std::common_type_t; const auto iord = this->range_.ordinal( - std::array, sizeof...(Index)>{{i...}}); + std::array{{static_cast(i)...}}); TA_ASSERT(this->range_.includes_ordinal(iord)); return this->data()[iord]; } diff --git a/src/TiledArray/tile_interface/scale.h b/src/TiledArray/tile_interface/scale.h index 8d548114b9..cbfa48c972 100644 --- a/src/TiledArray/tile_interface/scale.h +++ b/src/TiledArray/tile_interface/scale.h @@ -38,9 +38,13 @@ namespace TiledArray { /// \tparam Scalar A numeric type (i.e. TiledArray::detail::is_numeric_v /// is true) \param arg The left-hand argument to be scaled \param factor The /// scaling factor \return A tile that is equal to arg * factor -template && - !TiledArray::detail::is_array_v>* = nullptr> +template < + typename Arg, typename Scalar, + std::enable_if_t< + TiledArray::detail::is_numeric_v && + !TiledArray::detail::is_array_v && + std::is_void_v().scale( + std::declval()))>>>* = nullptr> inline auto scale(const Arg& arg, const Scalar factor) { return arg.scale(factor); } @@ -55,8 +59,11 @@ inline auto scale(const Arg& arg, const Scalar factor) { /// \return A tile that is equal to perm ^ (arg * factor) template < typename Arg, typename Scalar, typename Perm, - std::enable_if_t && - TiledArray::detail::is_permutation_v>* = nullptr> + std::enable_if_t< + TiledArray::detail::is_numeric_v && + TiledArray::detail::is_permutation_v && + std::is_void_v().scale( + std::declval(), std::declval()))>>>* = nullptr> inline auto scale(const Arg& arg, const Scalar factor, const Perm& perm) { return arg.scale(factor, perm); } From e2fd8bd9d058bedf7884931f7e6224e57c9043d6 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 22 Aug 2023 15:45:26 -0400 Subject: [PATCH 02/21] bump btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index 676ff2ee4f..8f18337801 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -24,7 +24,7 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 29a2bf3d3c2670c608b7bfdf2299d76fbc20e041) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 6fcb6451bc7ca46a00534a30c51dc5c230c39ac3) +set(TA_TRACKED_BTAS_TAG 4a1304cea6677255dc6f70705469b8c387713ccc) set(TA_TRACKED_BTAS_PREVIOUS_TAG 474ddc095cbea12a1d28aca5435703dd9f69b166) set(TA_TRACKED_LIBRETT_TAG 68abe31a9ec6fd2fd9ffbcd874daa80457f947da) From bc96de88b93eb204d5bea11e0f7620406852701a Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 18 Dec 2024 14:43:11 -0500 Subject: [PATCH 03/21] Revert btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index 9007dde279..d3a5494afc 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -16,7 +16,7 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 4cd283245027f19a7ea6c18f5a7195361dd89900) +set(TA_TRACKED_BTAS_TAG 6433b71c8c9ff4b14cbe80dc87fa4f50314c5cfc) set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) From 34fe780b30150c1e87c9a494d519d9ad3651b953 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 18 Dec 2024 15:34:45 -0500 Subject: [PATCH 04/21] Bump btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index d3a5494afc..bea5b653e4 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -16,7 +16,7 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 6433b71c8c9ff4b14cbe80dc87fa4f50314c5cfc) +set(TA_TRACKED_BTAS_TAG 22bf066f6860798a56326b54fa56fab1a2e52693) set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) From 65f75f1bae36633dc79dd825cef2e21d9a72a39d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 19 Dec 2024 14:28:39 -0500 Subject: [PATCH 05/21] bump btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index bea5b653e4..f921935e3a 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -16,7 +16,7 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 22bf066f6860798a56326b54fa56fab1a2e52693) +set(TA_TRACKED_BTAS_TAG 8a41dc962035dc2e818d179e9e0b779bcdd6d963) set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) From a52b3697d930d778a4c827025548ee2a759d14af Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 23 Dec 2024 20:38:24 -0500 Subject: [PATCH 06/21] Make a new CP ALS which takes the THC format --- src/TiledArray/math/solvers/cp.h | 2 + src/TiledArray/math/solvers/cp/cp.h | 17 +- src/TiledArray/math/solvers/cp/cp_als.h | 1 + src/TiledArray/math/solvers/cp/cp_thc_als.h | 315 ++++++++++++++++++++ 4 files changed, 327 insertions(+), 8 deletions(-) create mode 100644 src/TiledArray/math/solvers/cp/cp_thc_als.h diff --git a/src/TiledArray/math/solvers/cp.h b/src/TiledArray/math/solvers/cp.h index f94ea259ff..b957d33539 100644 --- a/src/TiledArray/math/solvers/cp.h +++ b/src/TiledArray/math/solvers/cp.h @@ -28,10 +28,12 @@ #include #include +#include #include namespace TiledArray { using TiledArray::math::cp::CP_ALS; +using TiledArray::math::cp::CP_THC_ALS; using TiledArray::math::cp::cp_reconstruct; } // namespace TiledArray diff --git a/src/TiledArray/math/solvers/cp/cp.h b/src/TiledArray/math/solvers/cp/cp.h index 9065776211..dcde333055 100644 --- a/src/TiledArray/math/solvers/cp/cp.h +++ b/src/TiledArray/math/solvers/cp/cp.h @@ -91,7 +91,7 @@ class CP { /// \returns the fit: \f$ 1.0 - |T_{\text{exact}} - T_{\text{approx}} | \f$ double compute_rank(size_t rank, size_t rank_block_size = 0, bool build_rank = false, double epsilonALS = 1e-3, - bool verbose = false) { + bool verbose = false, int niters = 100) { rank_block_size = (rank_block_size == 0 ? rank : rank_block_size); double epsilon = 1.0; fit_tol = epsilonALS; @@ -101,13 +101,13 @@ class CP { do { rank_trange = TiledRange1::make_uniform(cur_rank, rank_block_size); build_guess(cur_rank, rank_trange); - ALS(cur_rank, 100, verbose); + ALS(cur_rank, niters, verbose); ++cur_rank; } while (cur_rank < rank); } else { rank_trange = TiledRange1::make_uniform(rank, rank_block_size); build_guess(rank, rank_trange); - ALS(rank, 100, verbose); + ALS(rank, niters, verbose); } return epsilon; } @@ -185,7 +185,8 @@ class CP { final_fit, // The final fit of the ALS // optimization at fixed rank. fit_tol, // Tolerance for the ALS solver - norm_reference; // used in determining the CP fit. + norm_reference, // used in determining the CP fit. + norm_ref_sq; std::size_t converged_num = 0; // How many times the ALS solver // has changed less than the tolerance in a row @@ -370,16 +371,16 @@ class CP { for (size_t i = 1; i < ndim - 1; ++i, ++gram_ptr) { W("r,rp") *= (*gram_ptr)("r,rp"); } - auto result = sqrt(W("r,rp").dot( - (unNormalized_Factor("r,n") * unNormalized_Factor("rp,n")))); + auto result = W("r,rp").dot( + (unNormalized_Factor("r,n") * unNormalized_Factor("rp,n"))); // not sure why need to fence here, but hang periodically without it W.world().gop.fence(); return result; }; // compute the error in the loss function and find the fit const auto norm_cp = factor_norm(); // ||T_CP||_2 - const auto squared_norm_error = norm_reference * norm_reference + - norm_cp * norm_cp - + const auto squared_norm_error = norm_ref_sq + + norm_cp - 2.0 * ref_dot_cp; // ||T - T_CP||_2^2 // N.B. squared_norm_error is very noisy // TA_ASSERT(squared_norm_error >= - 1e-8); diff --git a/src/TiledArray/math/solvers/cp/cp_als.h b/src/TiledArray/math/solvers/cp/cp_als.h index bd5c5b9eee..4f44fa4aa1 100644 --- a/src/TiledArray/math/solvers/cp/cp_als.h +++ b/src/TiledArray/math/solvers/cp/cp_als.h @@ -69,6 +69,7 @@ class CP_ALS : public CP { first_gemm_dim_last.pop_back(); this->norm_reference = norm2(tref); + this->norm_ref_sq = this->norm_reference * this->norm_reference; } protected: diff --git a/src/TiledArray/math/solvers/cp/cp_thc_als.h b/src/TiledArray/math/solvers/cp/cp_thc_als.h new file mode 100644 index 0000000000..8aa919aa3d --- /dev/null +++ b/src/TiledArray/math/solvers/cp/cp_thc_als.h @@ -0,0 +1,315 @@ +/* +* This file is a part of TiledArray. +* Copyright (C) 2023 Virginia Tech +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* +* Karl Pierce +* Department of Chemistry, Virginia Tech +* +* cp.h +* April 17, 2022 +* +*/ + +#ifndef TILEDARRAY_MATH_SOLVERS_CP_CP_THC_ALS__H +#define TILEDARRAY_MATH_SOLVERS_CP_CP_THC_ALS__H + +#include +#include +#include + +namespace TiledArray::math::cp { + +/** +* This is a canonical polyadic (CP) optimization class which +* takes a reference order-N tensor that is expressed in the THC format +* and decomposes it into a set of order-2 tensors all coupled by +* a hyperdimension called the rank. These factors are optimized +* using an alternating least squares algorithm. +* +* @tparam Tile typing for the DistArray tiles +* @tparam Policy policy of the DistArray +**/ +template +class CP_THC_ALS : public CP { +public: + using CP::ndim; + using CP::cp_factors; + + /// Default CP_ALS constructor + CP_THC_ALS() = default; + + /// CP_ALS constructor function + /// takes, as a constant reference, the tensor to be decomposed + /// \param[in] tref A constant reference to the tensor to be decomposed. + // for now I am going to assume an order-4 THC but later this will be used for + // arbitrary order. + CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& tref3) + : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(tref3), world(tref1.world()) { + + DistArray pr, pq; + pr("r,rp") = (ref_orb_a("a,r") * ref_orb_a("a,rp")) * (ref_orb_b("i,r") * ref_orb_b("i,rp")); + pq("p,q") = ref_core("p,r") * pr("r,rp") * ref_core("q,rp"); + this->norm_ref_sq = pq("r,rp").dot(pr("r,rp")).get(); + this->norm_reference = sqrt(this->norm_ref_sq); + } + +protected: + const DistArray& ref_orb_a, ref_orb_b, ref_core; + madness::World& world; + std::vector lambda; + std::vector> THC_times_CPD; + TiledRange1 rank_trange1; + size_t size_of_core; + + /// This function constructs the initial CP factor matrices + /// stores them in CP::cp_factors vector. + /// In general the initial guess is constructed using quasi-random numbers + /// generated between [-1, 1] + /// \param[in] rank rank of the CP approximation + /// \param[in] rank_trange TiledRange1 of the rank dimension. + void build_guess(const size_t rank, const TiledRange1 rank_trange) override { + rank_trange1 = rank_trange; + if (cp_factors.size() == 0) { + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_a.trange().elements_range().extent(0), + rank_trange, ref_orb_a.trange().data()[0])); + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_b.trange().elements_range().extent(0), + rank_trange, ref_orb_b.trange().data()[0])); + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_a.trange().elements_range().extent(0), + rank_trange, ref_orb_a.trange().data()[0])); + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_b.trange().elements_range().extent(0), + rank_trange, ref_orb_b.trange().data()[0])); + } else { + TA_EXCEPTION("Currently no implementation to increase or change rank"); + } + + return; + } + + /// This function is specified by the CP solver + /// optimizes the rank @c rank CP approximation + /// stored in cp_factors. + /// \param[in] rank rank of the CP approximation + /// \param[in] max_iter max number of ALS iterations + /// \param[in] verbose Should ALS print fit information while running? + void ALS(size_t rank, size_t max_iter, bool verbose = false) override { + size_t iter = 0; + bool converged = false; + auto nthc = TA::rank(ref_core); + // initialize partial grammians + { + auto ptr = this->partial_grammian.begin(); + for (auto& i : cp_factors) { + (*ptr)("r,rp") = i("r,n") * i("rp, n"); + ++ptr; + } + DistArray pq; + pq("p,q") = ref_orb_a("a,p") * cp_factors[2]("q,a"); + THC_times_CPD.emplace_back(pq); + pq("p,q") *= ref_orb_b("b,p") * cp_factors[3]("q,b"); + pq.truncate(); + THC_times_CPD.emplace_back(pq); + + } +// auto factor_begin = cp_factors.data(), +// gram_begin = this->partial_grammian.data(); +// DistArray abr, tref; +// abr = einsum(ref_orb_a("a,r"), ref_orb_b("b,r"), "a,b,r"); +// tref("a,b,c,d") = abr("a,b,r") * ref_core("r,rp") * abr("c,d,rp"); +// +// std::cout << "Norm2 : " << norm2(tref) << std::endl; +// std::cout << "set: " << this->norm_reference << std::endl; + do { + update_factors_left(); + update_factors_right(); + converged = this->check_fit(verbose); + // for (auto i = 0; i < nthc; ++i) { + // update_factor(i, rank); + // } + ++iter; + } while (iter < max_iter && !converged); + } + + void update_factor(size_t mode, size_t rank){ + + size_t pos = 2 * mode, pos_plus_one = pos + 1; + // going through the core 0 is associated with factors 0 and 1 + // core 1 associated with factors 2 and 3 ... + // First we need to take other side of the problem and contract it with the core + // if core is greater than 2 I need to contract all the centers but + // here there's only one so just do one contract + size_t other_side = (mode + 1) % 2; + DistArray env, pq, W_env, W; + { + DistArray An; + env("p,q") = ref_core("p,r") * THC_times_CPD[other_side]("r,q"); + + pq("p,q") = ref_orb_b("b,p") * cp_factors[pos_plus_one]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + DistArray W; + other_side *= 2; + W_env("p,q") = this->partial_grammian[other_side]("p,q") * + this->partial_grammian[other_side + 1]("p,q"); + W("p,q") = this->partial_grammian[pos_plus_one]("p,q") * W_env("p,q"); + + this->cholesky_inverse(An, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(An); + cp_factors[pos] = An; + auto& gram = this->partial_grammian[pos]; + gram("r,rp") = An("r,n") * An("rp,n"); + pq("p,q") = ref_orb_a("a,p") * cp_factors[pos]("q,a"); + THC_times_CPD[mode] = pq; + } + + // Finished with the first factor in THC. + // Starting the second factor + { + DistArray Bn; + Bn = DistArray(); + Bn("q,b") = einsum(pq("p,q"), env("p,q"), "p,q")("p,q") * ref_orb_b("b,p"); + + this->MTtKRP = Bn; + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W("p,q") = this->partial_grammian[pos]("p,q") * W_env("p,q"); + this->cholesky_inverse(Bn, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->unNormalized_Factor = Bn.clone(); + this->normalize_factor(Bn); + cp_factors[pos_plus_one] = Bn; + auto& gram = this->partial_grammian[pos_plus_one]; + gram("r,rp") = Bn("r,n") * Bn("rp,n"); + THC_times_CPD[mode]("p,q") *= ref_orb_b("b,p") * cp_factors[pos_plus_one]("q,b"); + } + } + void update_factors_left(){ + + // going through the core 0 is associated with factors 0 and 1 + // core 1 associated with factors 2 and 3 ... + // First we need to take other side of the problem and contract it with the core + // if core is greater than 2 I need to contract all the centers but + // here there's only one so just do one contract + DistArray env, pq, W_env, W; + { + DistArray An; + env("p,q") = ref_core("p,r") * THC_times_CPD[1]("r,q"); + + pq("p,q") = ref_orb_b("b,p") * cp_factors[1]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W_env("p,q") = this->partial_grammian[2]("p,q") * + this->partial_grammian[3]("p,q"); + W("p,q") = this->partial_grammian[1]("p,q") * W_env("p,q"); + + this->cholesky_inverse(An, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(An); + cp_factors[0] = An; + this->partial_grammian[0]("r,rp") = An("r,n") * An("rp,n"); + pq("p,q") = ref_orb_a("a,p") * An("q,a"); + THC_times_CPD[0] = pq; + } + + // Finished with the first factor in THC. + // Starting the second factor + { + DistArray Bn; + Bn = DistArray(); + Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_b("b,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W("p,q") = this->partial_grammian[0]("p,q") * W_env("p,q"); + this->cholesky_inverse(Bn, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(Bn); + cp_factors[1] = Bn; + this->partial_grammian[1]("r,rp") = Bn("r,n") * Bn("rp,n"); + THC_times_CPD[0]("p,q") *= ref_orb_b("b,p") * Bn("q,b"); + } + } + void update_factors_right(){ + + // going through the core 0 is associated with factors 0 and 1 + // core 1 associated with factors 2 and 3 ... + // First we need to take other side of the problem and contract it with the core + // if core is greater than 2 I need to contract all the centers but + // here there's only one so just do one contract + DistArray env, pq, W_env, W; + { + DistArray An; + env("p,q") = ref_core("r,p") * THC_times_CPD[0]("r,q"); + + pq("p,q") = ref_orb_b("b,p") * cp_factors[3]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W_env("p,q") = this->partial_grammian[0]("p,q") * + this->partial_grammian[1]("p,q"); + W("p,q") = this->partial_grammian[3]("p,q") * W_env("p,q"); + + this->cholesky_inverse(An, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(An); + cp_factors[2] = An; + this->partial_grammian[2]("r,rp") = An("r,n") * An("rp,n"); + pq("p,q") = ref_orb_a("a,p") * An("q,a"); + THC_times_CPD[1] = pq; + } + + // Finished with the first factor in THC. + // Starting the second factor + { + DistArray Bn; + Bn = DistArray(); + Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_b("b,p"); + + this->MTtKRP = Bn; + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W("p,q") = this->partial_grammian[2]("p,q") * W_env("p,q"); + this->cholesky_inverse(Bn, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->unNormalized_Factor = Bn.clone(); + this->normalize_factor(Bn); + cp_factors[3] = Bn; + this->partial_grammian[3]("r,rp") = Bn("r,n") * Bn("rp,n"); + THC_times_CPD[1]("p,q") *= ref_orb_b("b,p") * Bn("q,b"); + } + } +}; + +} // namespace TiledArray::math::cp + +#endif // TILEDARRAY_MATH_SOLVERS_CP_CP_THC_ALS__H From 05653289ac84107db9872e5511639ec00f33aee8 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Sun, 12 Jan 2025 21:57:32 -0500 Subject: [PATCH 07/21] Bump btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index f921935e3a..09058797dd 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -16,7 +16,7 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 8a41dc962035dc2e818d179e9e0b779bcdd6d963) +set(TA_TRACKED_BTAS_TAG 26646416e5f5829dc13d0d97fb15ae5c01b78e82) set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) From 6ac5e3585e57e0444b97a0dcefa69ee52f106334 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 23 Jan 2025 13:58:55 -0500 Subject: [PATCH 08/21] Create a way to set the CP factor matrices of thc based CP --- src/TiledArray/math/solvers/cp/cp_thc_als.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/math/solvers/cp/cp_thc_als.h b/src/TiledArray/math/solvers/cp/cp_thc_als.h index 8aa919aa3d..30920130f3 100644 --- a/src/TiledArray/math/solvers/cp/cp_thc_als.h +++ b/src/TiledArray/math/solvers/cp/cp_thc_als.h @@ -66,6 +66,13 @@ class CP_THC_ALS : public CP { this->norm_reference = sqrt(this->norm_ref_sq); } + // This will just assume you've done the correct thing. If you haven't + // an error will be thrown somewhere else. + void set_factor_matrices(std::vector> & factors){ + cp_factors = factors; + factors_set = true; + } + protected: const DistArray& ref_orb_a, ref_orb_b, ref_core; madness::World& world; @@ -73,6 +80,7 @@ class CP_THC_ALS : public CP { std::vector> THC_times_CPD; TiledRange1 rank_trange1; size_t size_of_core; + bool factors_set = false; /// This function constructs the initial CP factor matrices /// stores them in CP::cp_factors vector. @@ -95,7 +103,9 @@ class CP_THC_ALS : public CP { cp_factors.emplace_back(this->construct_random_factor( world, rank, ref_orb_b.trange().elements_range().extent(0), rank_trange, ref_orb_b.trange().data()[0])); - } else { + } else if(factors_set) { + // Do nothing and don't throw an error. + }else { TA_EXCEPTION("Currently no implementation to increase or change rank"); } From 2bc3d90af1033f7173522d62df5ce796f6926660 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Sun, 23 Feb 2025 14:03:38 -0500 Subject: [PATCH 09/21] Update THC based solver to work with unsymmetric tensors --- src/TiledArray/math/solvers/cp/cp_thc_als.h | 42 ++++++++++++++------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/TiledArray/math/solvers/cp/cp_thc_als.h b/src/TiledArray/math/solvers/cp/cp_thc_als.h index 30920130f3..106a1c79e9 100644 --- a/src/TiledArray/math/solvers/cp/cp_thc_als.h +++ b/src/TiledArray/math/solvers/cp/cp_thc_als.h @@ -57,13 +57,28 @@ class CP_THC_ALS : public CP { // for now I am going to assume an order-4 THC but later this will be used for // arbitrary order. CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& tref3) - : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(tref3), world(tref1.world()) { + : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(tref3), world(tref1.world()), + ref_orb_c(tref1), ref_orb_d(tref2) { DistArray pr, pq; pr("r,rp") = (ref_orb_a("a,r") * ref_orb_a("a,rp")) * (ref_orb_b("i,r") * ref_orb_b("i,rp")); pq("p,q") = ref_core("p,r") * pr("r,rp") * ref_core("q,rp"); this->norm_ref_sq = pq("r,rp").dot(pr("r,rp")).get(); this->norm_reference = sqrt(this->norm_ref_sq); + symmetric = true; + } + + CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& core, + const DistArray& tref3, const DistArray& tref4) + : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(core), world(tref1.world()), + ref_orb_c(tref3), ref_orb_d(tref4) { + DistArray pr, pq; + // I need two things that are r_{ai} x r_{ab} which I am calling p x q + pr("p,r") = ((ref_orb_a("a,p") * ref_orb_a("a,r")) * (ref_orb_b("i,p") * ref_orb_b("i,r"))); + pq("p,q") = pr("p,r") * ref_core("r,q"); + pr("p,q") = ref_core("p,r") * ((ref_orb_c("b,r") * ref_orb_c("b,q")) * (ref_orb_d("j,r") * ref_orb_d("j,q"))); + this->norm_ref_sq = pq("p,q").dot(pr("p,q")).get(); + this->norm_reference = sqrt(this->norm_ref_sq); } // This will just assume you've done the correct thing. If you haven't @@ -74,13 +89,14 @@ class CP_THC_ALS : public CP { } protected: - const DistArray& ref_orb_a, ref_orb_b, ref_core; + const DistArray& ref_orb_a, ref_orb_b, ref_core, ref_orb_c, ref_orb_d; madness::World& world; std::vector lambda; std::vector> THC_times_CPD; TiledRange1 rank_trange1; size_t size_of_core; bool factors_set = false; + bool symmetric = false; /// This function constructs the initial CP factor matrices /// stores them in CP::cp_factors vector. @@ -98,11 +114,11 @@ class CP_THC_ALS : public CP { world, rank, ref_orb_b.trange().elements_range().extent(0), rank_trange, ref_orb_b.trange().data()[0])); cp_factors.emplace_back(this->construct_random_factor( - world, rank, ref_orb_a.trange().elements_range().extent(0), - rank_trange, ref_orb_a.trange().data()[0])); + world, rank, ref_orb_c.trange().elements_range().extent(0), + rank_trange, ref_orb_c.trange().data()[0])); cp_factors.emplace_back(this->construct_random_factor( - world, rank, ref_orb_b.trange().elements_range().extent(0), - rank_trange, ref_orb_b.trange().data()[0])); + world, rank, ref_orb_d.trange().elements_range().extent(0), + rank_trange, ref_orb_d.trange().data()[0])); } else if(factors_set) { // Do nothing and don't throw an error. }else { @@ -130,9 +146,9 @@ class CP_THC_ALS : public CP { ++ptr; } DistArray pq; - pq("p,q") = ref_orb_a("a,p") * cp_factors[2]("q,a"); + pq("p,q") = ref_orb_c("a,p") * cp_factors[2]("q,a"); THC_times_CPD.emplace_back(pq); - pq("p,q") *= ref_orb_b("b,p") * cp_factors[3]("q,b"); + pq("p,q") *= ref_orb_d("b,p") * cp_factors[3]("q,b"); pq.truncate(); THC_times_CPD.emplace_back(pq); @@ -277,8 +293,8 @@ class CP_THC_ALS : public CP { DistArray An; env("p,q") = ref_core("r,p") * THC_times_CPD[0]("r,q"); - pq("p,q") = ref_orb_b("b,p") * cp_factors[3]("q,b"); - An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); + pq("p,q") = ref_orb_d("b,p") * cp_factors[3]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_c("a,p"); // TODO check to see if the Cholesky will fail. If it does // use SVD @@ -292,7 +308,7 @@ class CP_THC_ALS : public CP { this->normalize_factor(An); cp_factors[2] = An; this->partial_grammian[2]("r,rp") = An("r,n") * An("rp,n"); - pq("p,q") = ref_orb_a("a,p") * An("q,a"); + pq("p,q") = ref_orb_c("a,p") * An("q,a"); THC_times_CPD[1] = pq; } @@ -301,7 +317,7 @@ class CP_THC_ALS : public CP { { DistArray Bn; Bn = DistArray(); - Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_b("b,p"); + Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_d("b,p"); this->MTtKRP = Bn; @@ -315,7 +331,7 @@ class CP_THC_ALS : public CP { this->normalize_factor(Bn); cp_factors[3] = Bn; this->partial_grammian[3]("r,rp") = Bn("r,n") * Bn("rp,n"); - THC_times_CPD[1]("p,q") *= ref_orb_b("b,p") * Bn("q,b"); + THC_times_CPD[1]("p,q") *= ref_orb_d("b,p") * Bn("q,b"); } } }; From fcdd1a6a23b130c8eac6e6bfee757162a879b8f4 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Sun, 23 Feb 2025 14:04:08 -0500 Subject: [PATCH 10/21] Update CP to not force lambda into one of the factors --- src/TiledArray/math/solvers/cp/cp.h | 56 ++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/math/solvers/cp/cp.h b/src/TiledArray/math/solvers/cp/cp.h index dcde333055..0094e4fc9d 100644 --- a/src/TiledArray/math/solvers/cp/cp.h +++ b/src/TiledArray/math/solvers/cp/cp.h @@ -37,6 +37,56 @@ static inline char intToAlphabet(int i) { return static_cast('a' + i); } } // namespace detail +/// normalizes "columns" (aka rows) of an updated factor matrix + +/// rows of factor matrices produced by least-squares are not unit +/// normalized. This takes each row and makes it unit normalized, +/// with inverse of the normalization factor stored in this->lambda +/// \param[in,out] factor in: unnormalized factor matrix, out: +/// normalized factor matrix +template +void normalize_factor(Array& factor, Array & lambda) { + using Tile = typename Array::value_type; + auto& world = factor.world(); + // this is what the code should look like, but expressions::einsum seems to + // be buggy lambda contains squared norms of rows + lambda = expressions::einsum(factor("r,n"), factor("r,n"), "r"); + + // element-wise square root to convert squared norms to norms + TiledArray::foreach_inplace( + lambda, + [](Tile& tile) { + auto lo = tile.range().lobound_data(); + auto up = tile.range().upbound_data(); + for (auto R = lo[0]; R < up[0]; ++R) { + const auto norm_squared_RR = tile({R}); + using std::sqrt; + tile({R}) = sqrt(norm_squared_RR); + } + }, + /* fence = */ true); + lambda.truncate(); + lambda.make_replicated(); + auto lambda_eig = array_to_eigen(lambda); + + TiledArray::foreach_inplace( + factor, + [&lambda_eig](Tile& tile) { + auto lo = tile.range().lobound_data(); + auto up = tile.range().upbound_data(); + for (auto R = lo[0]; R < up[0]; ++R) { + const auto lambda_R = lambda_eig(R, 0); + if (lambda_R < 1e-12) continue; + auto scale_by = 1.0 / lambda_R; + for (auto N = lo[1]; N < up[1]; ++N) { + tile(R, N) *= scale_by; + } + } + }, + /* fence = */ true); + factor.truncate(); +} + /** * This is a base class for the canonical polyadic (CP) * decomposition solver. The decomposition, in general, @@ -140,10 +190,14 @@ class CP { return epsilon; } - std::vector get_factor_matrices() { + std::vector get_factor_matrices(bool with_lambda = false) { TA_ASSERT(!cp_factors.empty(), "CP factor matrices have not been computed)"); auto result = cp_factors; + if(with_lambda){ + result.emplace_back(lambda); + return result; + } result.pop_back(); result.emplace_back(unNormalized_Factor); return result; From 8b0465ae7b6dbb1001c2ad96870ca65a7b8d32de Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Fri, 28 Feb 2025 11:18:14 -0500 Subject: [PATCH 11/21] Return the correct error --- src/TiledArray/math/solvers/cp/cp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/math/solvers/cp/cp.h b/src/TiledArray/math/solvers/cp/cp.h index 0094e4fc9d..d09e05fc04 100644 --- a/src/TiledArray/math/solvers/cp/cp.h +++ b/src/TiledArray/math/solvers/cp/cp.h @@ -159,7 +159,7 @@ class CP { build_guess(rank, rank_trange); ALS(rank, niters, verbose); } - return epsilon; + return this->final_fit; } /// This function computes the CP decomposition with an From 7adc80561d3456927ad7a7a33c00bf19feb46729 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 13 Mar 2025 17:50:07 -0400 Subject: [PATCH 12/21] fix btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index 1ce2c09cca..29f454f8ad 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -17,7 +17,7 @@ set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) set(TA_TRACKED_BTAS_TAG 26646416e5f5829dc13d0d97fb15ae5c01b78e82) -set(TA_TRACKED_BTAS_PREVIOUS_TAG 4b3757cc2b5862f93589afc1e37523e543779c7a) +set(TA_TRACKED_BTAS_PREVIOUS_TAG 1cfcb12647c768ccd83b098c64cda723e1275e49) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83) From c3ef59e03b16c5d82b5e3c0a8f15a152b1085c6d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 13 Mar 2025 18:59:49 -0400 Subject: [PATCH 13/21] Add a tests for the new CP solver --- tests/cp.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/cp.cpp b/tests/cp.cpp index 24cffe24d7..efb53c5353 100644 --- a/tests/cp.cpp +++ b/tests/cp.cpp @@ -88,6 +88,8 @@ struct CPFixture : public TiledRangeFixture { TSpArrayI a_sparse; }; +// right now this just works for contracting two order 3 tensors which form a THC like +// order 4 tensor. template TArrayT compute_cp(const TArrayT& T, size_t cp_rank, bool verbose = false) { CP_ALS CPD(T); @@ -95,6 +97,23 @@ TArrayT compute_cp(const TArrayT& T, size_t cp_rank, bool verbose = false) { return CPD.reconstruct(); } +template +TArrayT compute_thc_cp(const TArrayT& T, size_t cp_rank, size_t cp_thc_rank, bool verbose = true) { + CP_ALS CPD(T); + CPD.compute_rank(cp_rank, rank_tile_size, false, 1e-3, verbose); + auto factors = CPD.get_factor_matrices(); + + TArrayT core; + core("p,q") = factors[2]("p,X") * factors[2]("q,X"); + + factors[0]("A,r") = factors[0]("r,A"); + factors[1]("A,r") = factors[1]("r,A"); + auto CPTHC = CP_THC_ALS(factors[0], factors[1], core); + + CPTHC.compute_rank(cp_thc_rank, rank_tile_size, false, 1e-3, verbose); + return CPTHC.reconstruct(); +} + enum class Fill { Random, Constant }; template (tr3); + size_t cp_rank = 1; + auto b_cp = compute_thc_cp(b_dense, cp_rank, cp_rank, verbose); + TArrayD diff; + diff("a,b,c,d") = (b_dense("a,b,X") * b_dense("c,d,X")) - b_cp("a,b,c,d"); + bool accurate = (TA::norm2(diff) / TA::norm2(b_dense) < 1e-1); + BOOST_CHECK(accurate); + } + // sparse test // order-3 test { @@ -417,6 +447,17 @@ BOOST_AUTO_TEST_CASE(ta_cp_als) { TSpArrayD diff; diff("a,b,c,d,e") = b_sparse("a,b,c,d,e") - b_cp("a,b,c,d,e"); bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < target_rel_error); +// BOOST_CHECK(accurate); + } + + // test the THC like CPD solver + { + auto b_sparse = make(tr3); + size_t cp_rank = 77; + auto b_cp = compute_thc_cp(b_sparse, cp_rank, 5 * cp_rank, verbose); + TSpArrayD diff; + diff("a,b,c,d") = (b_sparse("a,b,X") * b_sparse("c,d,X")) - b_cp("a,b,c,d"); + bool accurate = (TA::norm2(diff) / TA::norm2(b_sparse) < 1e-1); BOOST_CHECK(accurate); } } From 6558ff8945a272592a7d3960efe0f7232aa9ec31 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 17 Mar 2025 16:22:40 -0400 Subject: [PATCH 14/21] Make it possible to set factor matrices instead of computing new ones --- src/TiledArray/math/solvers/cp/cp_als.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/TiledArray/math/solvers/cp/cp_als.h b/src/TiledArray/math/solvers/cp/cp_als.h index 4f44fa4aa1..1bcec870ad 100644 --- a/src/TiledArray/math/solvers/cp/cp_als.h +++ b/src/TiledArray/math/solvers/cp/cp_als.h @@ -72,12 +72,18 @@ class CP_ALS : public CP { this->norm_ref_sq = this->norm_reference * this->norm_reference; } + void set_factor_matrices(std::vector> & factors){ + cp_factors = factors; + factors_set = true; + } + protected: const DistArray& reference; madness::World& world; std::string ref_indices, first_gemm_dim_one, first_gemm_dim_last; std::vector lambda; TiledRange1 rank_trange1; + bool factors_set = false; /// This function constructs the initial CP facotr matrices /// stores them in CP::cp_factors vector. @@ -94,7 +100,10 @@ class CP_ALS : public CP { rank_trange, reference.trange().data()[i]); cp_factors.emplace_back(factor); } - } else { + }else if(factors_set){ + // Do nothing and don't throw an error. + } + else { TA_EXCEPTION("Currently no implementation to increase or change rank"); } From 297c302481646af4acd3a9ff831fc6881ee9dcdb Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Fri, 21 Mar 2025 14:34:57 -0400 Subject: [PATCH 15/21] lapack throws a std::exception not a runtime_error This catch was being ignored when cholesky_solve fails --- src/TiledArray/math/solvers/cp/cp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TiledArray/math/solvers/cp/cp.h b/src/TiledArray/math/solvers/cp/cp.h index d09e05fc04..0bb6df8c4c 100644 --- a/src/TiledArray/math/solvers/cp/cp.h +++ b/src/TiledArray/math/solvers/cp/cp.h @@ -314,7 +314,7 @@ class CP { // MtKRP); try { MtKRP = math::linalg::cholesky_solve(W, MtKRP); - } catch (std::runtime_error& ex) { + } catch (std::exception& ex){ // if W is near-singular try LU instead of Cholesky if (std::string(ex.what()).find("lapack::posv failed") != std::string::npos) { From 63e0bcd155dc8146b0fec4997de194f0bfa40a4d Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 1 Apr 2025 17:43:50 -0400 Subject: [PATCH 16/21] Use default world instead of provided world --- src/TiledArray/math/solvers/cp/cp_thc_als.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/math/solvers/cp/cp_thc_als.h b/src/TiledArray/math/solvers/cp/cp_thc_als.h index 106a1c79e9..1b87af7094 100644 --- a/src/TiledArray/math/solvers/cp/cp_thc_als.h +++ b/src/TiledArray/math/solvers/cp/cp_thc_als.h @@ -57,7 +57,7 @@ class CP_THC_ALS : public CP { // for now I am going to assume an order-4 THC but later this will be used for // arbitrary order. CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& tref3) - : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(tref3), world(tref1.world()), + : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(tref3), world(get_default_world()), ref_orb_c(tref1), ref_orb_d(tref2) { DistArray pr, pq; @@ -70,7 +70,7 @@ class CP_THC_ALS : public CP { CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& core, const DistArray& tref3, const DistArray& tref4) - : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(core), world(tref1.world()), + : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(core), world(get_default_world()), ref_orb_c(tref3), ref_orb_d(tref4) { DistArray pr, pq; // I need two things that are r_{ai} x r_{ab} which I am calling p x q From b140892116db79ee0c7b60892430d196721418a2 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 27 May 2025 11:26:43 +0900 Subject: [PATCH 17/21] Bump btas tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index ba545a05c8..ea5eb95a8f 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -17,7 +17,7 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG bd84a52766ab497dedc2f15f2162fb0eb7ec4653) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 7daa03676f34f08cb2af38382a0c3e79eb2b3b49) +set(TA_TRACKED_BTAS_TAG 74dd9277c71043c564f931c9d1f07842548d2349) set(TA_TRACKED_BTAS_PREVIOUS_TAG 26646416e5f5829dc13d0d97fb15ae5c01b78e82) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) From 854b2caf0bbdce583348acec6c0ed4cb3c7be704 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 23 Jun 2025 15:39:02 -0400 Subject: [PATCH 18/21] Format --- src/TiledArray/math/solvers/cp/cp_thc_als.h | 662 ++++++++++---------- 1 file changed, 339 insertions(+), 323 deletions(-) diff --git a/src/TiledArray/math/solvers/cp/cp_thc_als.h b/src/TiledArray/math/solvers/cp/cp_thc_als.h index 1b87af7094..b2479c8dd5 100644 --- a/src/TiledArray/math/solvers/cp/cp_thc_als.h +++ b/src/TiledArray/math/solvers/cp/cp_thc_als.h @@ -1,339 +1,355 @@ /* -* This file is a part of TiledArray. -* Copyright (C) 2023 Virginia Tech -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see . -* -* Karl Pierce -* Department of Chemistry, Virginia Tech -* -* cp.h -* April 17, 2022 -* -*/ + * This file is a part of TiledArray. + * Copyright (C) 2023 Virginia Tech + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Karl Pierce + * Department of Chemistry, Virginia Tech + * + * cp.h + * April 17, 2022 + * + */ #ifndef TILEDARRAY_MATH_SOLVERS_CP_CP_THC_ALS__H #define TILEDARRAY_MATH_SOLVERS_CP_CP_THC_ALS__H -#include #include +#include #include namespace TiledArray::math::cp { /** -* This is a canonical polyadic (CP) optimization class which -* takes a reference order-N tensor that is expressed in the THC format -* and decomposes it into a set of order-2 tensors all coupled by -* a hyperdimension called the rank. These factors are optimized -* using an alternating least squares algorithm. -* -* @tparam Tile typing for the DistArray tiles -* @tparam Policy policy of the DistArray -**/ + * This is a canonical polyadic (CP) optimization class which + * takes a reference order-N tensor that is expressed in the THC format + * and decomposes it into a set of order-2 tensors all coupled by + * a hyperdimension called the rank. These factors are optimized + * using an alternating least squares algorithm. + * + * @tparam Tile typing for the DistArray tiles + * @tparam Policy policy of the DistArray + **/ template class CP_THC_ALS : public CP { -public: - using CP::ndim; - using CP::cp_factors; - - /// Default CP_ALS constructor - CP_THC_ALS() = default; - - /// CP_ALS constructor function - /// takes, as a constant reference, the tensor to be decomposed - /// \param[in] tref A constant reference to the tensor to be decomposed. - // for now I am going to assume an order-4 THC but later this will be used for - // arbitrary order. - CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& tref3) - : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(tref3), world(get_default_world()), - ref_orb_c(tref1), ref_orb_d(tref2) { - - DistArray pr, pq; - pr("r,rp") = (ref_orb_a("a,r") * ref_orb_a("a,rp")) * (ref_orb_b("i,r") * ref_orb_b("i,rp")); - pq("p,q") = ref_core("p,r") * pr("r,rp") * ref_core("q,rp"); - this->norm_ref_sq = pq("r,rp").dot(pr("r,rp")).get(); - this->norm_reference = sqrt(this->norm_ref_sq); - symmetric = true; - } - - CP_THC_ALS(const DistArray& tref1, const DistArray& tref2, const DistArray& core, - const DistArray& tref3, const DistArray& tref4) - : CP(2 * rank(tref3)), ref_orb_a(tref1), ref_orb_b(tref2), ref_core(core), world(get_default_world()), - ref_orb_c(tref3), ref_orb_d(tref4) { - DistArray pr, pq; - // I need two things that are r_{ai} x r_{ab} which I am calling p x q - pr("p,r") = ((ref_orb_a("a,p") * ref_orb_a("a,r")) * (ref_orb_b("i,p") * ref_orb_b("i,r"))); - pq("p,q") = pr("p,r") * ref_core("r,q"); - pr("p,q") = ref_core("p,r") * ((ref_orb_c("b,r") * ref_orb_c("b,q")) * (ref_orb_d("j,r") * ref_orb_d("j,q"))); - this->norm_ref_sq = pq("p,q").dot(pr("p,q")).get(); - this->norm_reference = sqrt(this->norm_ref_sq); - } - - // This will just assume you've done the correct thing. If you haven't - // an error will be thrown somewhere else. - void set_factor_matrices(std::vector> & factors){ - cp_factors = factors; - factors_set = true; - } - -protected: - const DistArray& ref_orb_a, ref_orb_b, ref_core, ref_orb_c, ref_orb_d; - madness::World& world; - std::vector lambda; - std::vector> THC_times_CPD; - TiledRange1 rank_trange1; - size_t size_of_core; - bool factors_set = false; - bool symmetric = false; - - /// This function constructs the initial CP factor matrices - /// stores them in CP::cp_factors vector. - /// In general the initial guess is constructed using quasi-random numbers - /// generated between [-1, 1] - /// \param[in] rank rank of the CP approximation - /// \param[in] rank_trange TiledRange1 of the rank dimension. - void build_guess(const size_t rank, const TiledRange1 rank_trange) override { - rank_trange1 = rank_trange; - if (cp_factors.size() == 0) { - cp_factors.emplace_back(this->construct_random_factor( - world, rank, ref_orb_a.trange().elements_range().extent(0), - rank_trange, ref_orb_a.trange().data()[0])); - cp_factors.emplace_back(this->construct_random_factor( - world, rank, ref_orb_b.trange().elements_range().extent(0), - rank_trange, ref_orb_b.trange().data()[0])); - cp_factors.emplace_back(this->construct_random_factor( - world, rank, ref_orb_c.trange().elements_range().extent(0), - rank_trange, ref_orb_c.trange().data()[0])); - cp_factors.emplace_back(this->construct_random_factor( - world, rank, ref_orb_d.trange().elements_range().extent(0), - rank_trange, ref_orb_d.trange().data()[0])); - } else if(factors_set) { - // Do nothing and don't throw an error. - }else { - TA_EXCEPTION("Currently no implementation to increase or change rank"); - } - - return; - } - - /// This function is specified by the CP solver - /// optimizes the rank @c rank CP approximation - /// stored in cp_factors. - /// \param[in] rank rank of the CP approximation - /// \param[in] max_iter max number of ALS iterations - /// \param[in] verbose Should ALS print fit information while running? - void ALS(size_t rank, size_t max_iter, bool verbose = false) override { - size_t iter = 0; - bool converged = false; - auto nthc = TA::rank(ref_core); - // initialize partial grammians - { - auto ptr = this->partial_grammian.begin(); - for (auto& i : cp_factors) { - (*ptr)("r,rp") = i("r,n") * i("rp, n"); - ++ptr; - } - DistArray pq; - pq("p,q") = ref_orb_c("a,p") * cp_factors[2]("q,a"); - THC_times_CPD.emplace_back(pq); - pq("p,q") *= ref_orb_d("b,p") * cp_factors[3]("q,b"); - pq.truncate(); - THC_times_CPD.emplace_back(pq); - - } -// auto factor_begin = cp_factors.data(), -// gram_begin = this->partial_grammian.data(); -// DistArray abr, tref; -// abr = einsum(ref_orb_a("a,r"), ref_orb_b("b,r"), "a,b,r"); -// tref("a,b,c,d") = abr("a,b,r") * ref_core("r,rp") * abr("c,d,rp"); -// -// std::cout << "Norm2 : " << norm2(tref) << std::endl; -// std::cout << "set: " << this->norm_reference << std::endl; - do { - update_factors_left(); - update_factors_right(); - converged = this->check_fit(verbose); - // for (auto i = 0; i < nthc; ++i) { - // update_factor(i, rank); - // } - ++iter; - } while (iter < max_iter && !converged); - } - - void update_factor(size_t mode, size_t rank){ - - size_t pos = 2 * mode, pos_plus_one = pos + 1; - // going through the core 0 is associated with factors 0 and 1 - // core 1 associated with factors 2 and 3 ... - // First we need to take other side of the problem and contract it with the core - // if core is greater than 2 I need to contract all the centers but - // here there's only one so just do one contract - size_t other_side = (mode + 1) % 2; - DistArray env, pq, W_env, W; - { - DistArray An; - env("p,q") = ref_core("p,r") * THC_times_CPD[other_side]("r,q"); - - pq("p,q") = ref_orb_b("b,p") * cp_factors[pos_plus_one]("q,b"); - An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); - - // TODO check to see if the Cholesky will fail. If it does - // use SVD - DistArray W; - other_side *= 2; - W_env("p,q") = this->partial_grammian[other_side]("p,q") * - this->partial_grammian[other_side + 1]("p,q"); - W("p,q") = this->partial_grammian[pos_plus_one]("p,q") * W_env("p,q"); - - this->cholesky_inverse(An, W); - world.gop.fence(); // N.B. seems to deadlock without this - - this->normalize_factor(An); - cp_factors[pos] = An; - auto& gram = this->partial_grammian[pos]; - gram("r,rp") = An("r,n") * An("rp,n"); - pq("p,q") = ref_orb_a("a,p") * cp_factors[pos]("q,a"); - THC_times_CPD[mode] = pq; - } - - // Finished with the first factor in THC. - // Starting the second factor - { - DistArray Bn; - Bn = DistArray(); - Bn("q,b") = einsum(pq("p,q"), env("p,q"), "p,q")("p,q") * ref_orb_b("b,p"); - - this->MTtKRP = Bn; - - // TODO check to see if the Cholesky will fail. If it does - // use SVD - W("p,q") = this->partial_grammian[pos]("p,q") * W_env("p,q"); - this->cholesky_inverse(Bn, W); - world.gop.fence(); // N.B. seems to deadlock without this - - this->unNormalized_Factor = Bn.clone(); - this->normalize_factor(Bn); - cp_factors[pos_plus_one] = Bn; - auto& gram = this->partial_grammian[pos_plus_one]; - gram("r,rp") = Bn("r,n") * Bn("rp,n"); - THC_times_CPD[mode]("p,q") *= ref_orb_b("b,p") * cp_factors[pos_plus_one]("q,b"); - } - } - void update_factors_left(){ - - // going through the core 0 is associated with factors 0 and 1 - // core 1 associated with factors 2 and 3 ... - // First we need to take other side of the problem and contract it with the core - // if core is greater than 2 I need to contract all the centers but - // here there's only one so just do one contract - DistArray env, pq, W_env, W; - { - DistArray An; - env("p,q") = ref_core("p,r") * THC_times_CPD[1]("r,q"); - - pq("p,q") = ref_orb_b("b,p") * cp_factors[1]("q,b"); - An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); - - // TODO check to see if the Cholesky will fail. If it does - // use SVD - W_env("p,q") = this->partial_grammian[2]("p,q") * - this->partial_grammian[3]("p,q"); - W("p,q") = this->partial_grammian[1]("p,q") * W_env("p,q"); - - this->cholesky_inverse(An, W); - world.gop.fence(); // N.B. seems to deadlock without this - - this->normalize_factor(An); - cp_factors[0] = An; - this->partial_grammian[0]("r,rp") = An("r,n") * An("rp,n"); - pq("p,q") = ref_orb_a("a,p") * An("q,a"); - THC_times_CPD[0] = pq; - } - - // Finished with the first factor in THC. - // Starting the second factor - { - DistArray Bn; - Bn = DistArray(); - Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_b("b,p"); - - // TODO check to see if the Cholesky will fail. If it does - // use SVD - W("p,q") = this->partial_grammian[0]("p,q") * W_env("p,q"); - this->cholesky_inverse(Bn, W); - world.gop.fence(); // N.B. seems to deadlock without this - - this->normalize_factor(Bn); - cp_factors[1] = Bn; - this->partial_grammian[1]("r,rp") = Bn("r,n") * Bn("rp,n"); - THC_times_CPD[0]("p,q") *= ref_orb_b("b,p") * Bn("q,b"); - } - } - void update_factors_right(){ - - // going through the core 0 is associated with factors 0 and 1 - // core 1 associated with factors 2 and 3 ... - // First we need to take other side of the problem and contract it with the core - // if core is greater than 2 I need to contract all the centers but - // here there's only one so just do one contract - DistArray env, pq, W_env, W; - { - DistArray An; - env("p,q") = ref_core("r,p") * THC_times_CPD[0]("r,q"); - - pq("p,q") = ref_orb_d("b,p") * cp_factors[3]("q,b"); - An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_c("a,p"); - - // TODO check to see if the Cholesky will fail. If it does - // use SVD - W_env("p,q") = this->partial_grammian[0]("p,q") * - this->partial_grammian[1]("p,q"); - W("p,q") = this->partial_grammian[3]("p,q") * W_env("p,q"); - - this->cholesky_inverse(An, W); - world.gop.fence(); // N.B. seems to deadlock without this - - this->normalize_factor(An); - cp_factors[2] = An; - this->partial_grammian[2]("r,rp") = An("r,n") * An("rp,n"); - pq("p,q") = ref_orb_c("a,p") * An("q,a"); - THC_times_CPD[1] = pq; - } - - // Finished with the first factor in THC. - // Starting the second factor - { - DistArray Bn; - Bn = DistArray(); - Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_d("b,p"); - - this->MTtKRP = Bn; - - // TODO check to see if the Cholesky will fail. If it does - // use SVD - W("p,q") = this->partial_grammian[2]("p,q") * W_env("p,q"); - this->cholesky_inverse(Bn, W); - world.gop.fence(); // N.B. seems to deadlock without this - - this->unNormalized_Factor = Bn.clone(); - this->normalize_factor(Bn); - cp_factors[3] = Bn; - this->partial_grammian[3]("r,rp") = Bn("r,n") * Bn("rp,n"); - THC_times_CPD[1]("p,q") *= ref_orb_d("b,p") * Bn("q,b"); - } - } + public: + using CP::ndim; + using CP::cp_factors; + + /// Default CP_ALS constructor + CP_THC_ALS() = default; + + /// CP_ALS constructor function + /// takes, as a constant reference, the tensor to be decomposed + /// \param[in] tref A constant reference to the tensor to be decomposed. + // for now I am going to assume an order-4 THC but later this will be used for + // arbitrary order. + CP_THC_ALS(const DistArray& tref1, + const DistArray& tref2, + const DistArray& tref3) + : CP(2 * rank(tref3)), + ref_orb_a(tref1), + ref_orb_b(tref2), + ref_core(tref3), + world(get_default_world()), + ref_orb_c(tref1), + ref_orb_d(tref2) { + DistArray pr, pq; + pr("r,rp") = (ref_orb_a("a,r") * ref_orb_a("a,rp")) * + (ref_orb_b("i,r") * ref_orb_b("i,rp")); + pq("p,q") = ref_core("p,r") * pr("r,rp") * ref_core("q,rp"); + this->norm_ref_sq = pq("r,rp").dot(pr("r,rp")).get(); + this->norm_reference = sqrt(this->norm_ref_sq); + symmetric = true; + } + + CP_THC_ALS(const DistArray& tref1, + const DistArray& tref2, + const DistArray& core, + const DistArray& tref3, + const DistArray& tref4) + : CP(2 * rank(tref3)), + ref_orb_a(tref1), + ref_orb_b(tref2), + ref_core(core), + world(get_default_world()), + ref_orb_c(tref3), + ref_orb_d(tref4) { + DistArray pr, pq; + // I need two things that are r_{ai} x r_{ab} which I am calling p x q + pr("p,r") = ((ref_orb_a("a,p") * ref_orb_a("a,r")) * + (ref_orb_b("i,p") * ref_orb_b("i,r"))); + pq("p,q") = pr("p,r") * ref_core("r,q"); + pr("p,q") = ref_core("p,r") * ((ref_orb_c("b,r") * ref_orb_c("b,q")) * + (ref_orb_d("j,r") * ref_orb_d("j,q"))); + this->norm_ref_sq = pq("p,q").dot(pr("p,q")).get(); + this->norm_reference = sqrt(this->norm_ref_sq); + } + + // This will just assume you've done the correct thing. If you haven't + // an error will be thrown somewhere else. + void set_factor_matrices(std::vector>& factors) { + cp_factors = factors; + factors_set = true; + } + + protected: + const DistArray&ref_orb_a, ref_orb_b, ref_core, ref_orb_c, + ref_orb_d; + madness::World& world; + std::vector lambda; + std::vector> THC_times_CPD; + TiledRange1 rank_trange1; + size_t size_of_core; + bool factors_set = false; + bool symmetric = false; + + /// This function constructs the initial CP factor matrices + /// stores them in CP::cp_factors vector. + /// In general the initial guess is constructed using quasi-random numbers + /// generated between [-1, 1] + /// \param[in] rank rank of the CP approximation + /// \param[in] rank_trange TiledRange1 of the rank dimension. + void build_guess(const size_t rank, const TiledRange1 rank_trange) override { + rank_trange1 = rank_trange; + if (cp_factors.size() == 0) { + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_a.trange().elements_range().extent(0), + rank_trange, ref_orb_a.trange().data()[0])); + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_b.trange().elements_range().extent(0), + rank_trange, ref_orb_b.trange().data()[0])); + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_c.trange().elements_range().extent(0), + rank_trange, ref_orb_c.trange().data()[0])); + cp_factors.emplace_back(this->construct_random_factor( + world, rank, ref_orb_d.trange().elements_range().extent(0), + rank_trange, ref_orb_d.trange().data()[0])); + } else if (factors_set) { + // Do nothing and don't throw an error. + } else { + TA_EXCEPTION("Currently no implementation to increase or change rank"); + } + + return; + } + + /// This function is specified by the CP solver + /// optimizes the rank @c rank CP approximation + /// stored in cp_factors. + /// \param[in] rank rank of the CP approximation + /// \param[in] max_iter max number of ALS iterations + /// \param[in] verbose Should ALS print fit information while running? + void ALS(size_t rank, size_t max_iter, bool verbose = false) override { + size_t iter = 0; + bool converged = false; + auto nthc = TA::rank(ref_core); + // initialize partial grammians + { + auto ptr = this->partial_grammian.begin(); + for (auto& i : cp_factors) { + (*ptr)("r,rp") = i("r,n") * i("rp, n"); + ++ptr; + } + DistArray pq; + pq("p,q") = ref_orb_c("a,p") * cp_factors[2]("q,a"); + THC_times_CPD.emplace_back(pq); + pq("p,q") *= ref_orb_d("b,p") * cp_factors[3]("q,b"); + pq.truncate(); + THC_times_CPD.emplace_back(pq); + } + // auto factor_begin = cp_factors.data(), + // gram_begin = this->partial_grammian.data(); + // DistArray abr, tref; + // abr = einsum(ref_orb_a("a,r"), ref_orb_b("b,r"), "a,b,r"); + // tref("a,b,c,d") = abr("a,b,r") * ref_core("r,rp") * abr("c,d,rp"); + // + // std::cout << "Norm2 : " << norm2(tref) << std::endl; + // std::cout << "set: " << this->norm_reference << std::endl; + do { + update_factors_left(); + update_factors_right(); + converged = this->check_fit(verbose); + // for (auto i = 0; i < nthc; ++i) { + // update_factor(i, rank); + // } + ++iter; + } while (iter < max_iter && !converged); + } + + void update_factor(size_t mode, size_t rank) { + size_t pos = 2 * mode, pos_plus_one = pos + 1; + // going through the core 0 is associated with factors 0 and 1 + // core 1 associated with factors 2 and 3 ... + // First we need to take other side of the problem and contract it with the + // core if core is greater than 2 I need to contract all the centers but + // here there's only one so just do one contract + size_t other_side = (mode + 1) % 2; + DistArray env, pq, W_env, W; + { + DistArray An; + env("p,q") = ref_core("p,r") * THC_times_CPD[other_side]("r,q"); + + pq("p,q") = ref_orb_b("b,p") * cp_factors[pos_plus_one]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + DistArray W; + other_side *= 2; + W_env("p,q") = this->partial_grammian[other_side]("p,q") * + this->partial_grammian[other_side + 1]("p,q"); + W("p,q") = this->partial_grammian[pos_plus_one]("p,q") * W_env("p,q"); + + this->cholesky_inverse(An, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(An); + cp_factors[pos] = An; + auto& gram = this->partial_grammian[pos]; + gram("r,rp") = An("r,n") * An("rp,n"); + pq("p,q") = ref_orb_a("a,p") * cp_factors[pos]("q,a"); + THC_times_CPD[mode] = pq; + } + + // Finished with the first factor in THC. + // Starting the second factor + { + DistArray Bn; + Bn = DistArray(); + Bn("q,b") = + einsum(pq("p,q"), env("p,q"), "p,q")("p,q") * ref_orb_b("b,p"); + + this->MTtKRP = Bn; + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W("p,q") = this->partial_grammian[pos]("p,q") * W_env("p,q"); + this->cholesky_inverse(Bn, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->unNormalized_Factor = Bn.clone(); + this->normalize_factor(Bn); + cp_factors[pos_plus_one] = Bn; + auto& gram = this->partial_grammian[pos_plus_one]; + gram("r,rp") = Bn("r,n") * Bn("rp,n"); + THC_times_CPD[mode]("p,q") *= + ref_orb_b("b,p") * cp_factors[pos_plus_one]("q,b"); + } + } + void update_factors_left() { + // going through the core 0 is associated with factors 0 and 1 + // core 1 associated with factors 2 and 3 ... + // First we need to take other side of the problem and contract it with the + // core if core is greater than 2 I need to contract all the centers but + // here there's only one so just do one contract + DistArray env, pq, W_env, W; + { + DistArray An; + env("p,q") = ref_core("p,r") * THC_times_CPD[1]("r,q"); + + pq("p,q") = ref_orb_b("b,p") * cp_factors[1]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_a("a,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W_env("p,q") = + this->partial_grammian[2]("p,q") * this->partial_grammian[3]("p,q"); + W("p,q") = this->partial_grammian[1]("p,q") * W_env("p,q"); + + this->cholesky_inverse(An, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(An); + cp_factors[0] = An; + this->partial_grammian[0]("r,rp") = An("r,n") * An("rp,n"); + pq("p,q") = ref_orb_a("a,p") * An("q,a"); + THC_times_CPD[0] = pq; + } + + // Finished with the first factor in THC. + // Starting the second factor + { + DistArray Bn; + Bn = DistArray(); + Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_b("b,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W("p,q") = this->partial_grammian[0]("p,q") * W_env("p,q"); + this->cholesky_inverse(Bn, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(Bn); + cp_factors[1] = Bn; + this->partial_grammian[1]("r,rp") = Bn("r,n") * Bn("rp,n"); + THC_times_CPD[0]("p,q") *= ref_orb_b("b,p") * Bn("q,b"); + } + } + void update_factors_right() { + // going through the core 0 is associated with factors 0 and 1 + // core 1 associated with factors 2 and 3 ... + // First we need to take other side of the problem and contract it with the + // core if core is greater than 2 I need to contract all the centers but + // here there's only one so just do one contract + DistArray env, pq, W_env, W; + { + DistArray An; + env("p,q") = ref_core("r,p") * THC_times_CPD[0]("r,q"); + + pq("p,q") = ref_orb_d("b,p") * cp_factors[3]("q,b"); + An("q,a") = (pq("p,q") * env("p,q")) * ref_orb_c("a,p"); + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W_env("p,q") = + this->partial_grammian[0]("p,q") * this->partial_grammian[1]("p,q"); + W("p,q") = this->partial_grammian[3]("p,q") * W_env("p,q"); + + this->cholesky_inverse(An, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->normalize_factor(An); + cp_factors[2] = An; + this->partial_grammian[2]("r,rp") = An("r,n") * An("rp,n"); + pq("p,q") = ref_orb_c("a,p") * An("q,a"); + THC_times_CPD[1] = pq; + } + + // Finished with the first factor in THC. + // Starting the second factor + { + DistArray Bn; + Bn = DistArray(); + Bn("q,b") = (pq("p,q") * env("p,q")) * ref_orb_d("b,p"); + + this->MTtKRP = Bn; + + // TODO check to see if the Cholesky will fail. If it does + // use SVD + W("p,q") = this->partial_grammian[2]("p,q") * W_env("p,q"); + this->cholesky_inverse(Bn, W); + world.gop.fence(); // N.B. seems to deadlock without this + + this->unNormalized_Factor = Bn.clone(); + this->normalize_factor(Bn); + cp_factors[3] = Bn; + this->partial_grammian[3]("r,rp") = Bn("r,n") * Bn("rp,n"); + THC_times_CPD[1]("p,q") *= ref_orb_d("b,p") * Bn("q,b"); + } + } }; } // namespace TiledArray::math::cp From c5a601a1730800cb9f2500e4e8c897011fca3fa2 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 23 Jun 2025 15:43:09 -0400 Subject: [PATCH 19/21] format --- src/TiledArray/math/solvers/cp/cp.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/TiledArray/math/solvers/cp/cp.h b/src/TiledArray/math/solvers/cp/cp.h index 0bb6df8c4c..2faa54cdcc 100644 --- a/src/TiledArray/math/solvers/cp/cp.h +++ b/src/TiledArray/math/solvers/cp/cp.h @@ -44,8 +44,8 @@ static inline char intToAlphabet(int i) { return static_cast('a' + i); } /// with inverse of the normalization factor stored in this->lambda /// \param[in,out] factor in: unnormalized factor matrix, out: /// normalized factor matrix -template -void normalize_factor(Array& factor, Array & lambda) { +template +void normalize_factor(Array& factor, Array& lambda) { using Tile = typename Array::value_type; auto& world = factor.world(); // this is what the code should look like, but expressions::einsum seems to @@ -194,7 +194,7 @@ class CP { TA_ASSERT(!cp_factors.empty(), "CP factor matrices have not been computed)"); auto result = cp_factors; - if(with_lambda){ + if (with_lambda) { result.emplace_back(lambda); return result; } @@ -314,7 +314,7 @@ class CP { // MtKRP); try { MtKRP = math::linalg::cholesky_solve(W, MtKRP); - } catch (std::exception& ex){ + } catch (std::exception& ex) { // if W is near-singular try LU instead of Cholesky if (std::string(ex.what()).find("lapack::posv failed") != std::string::npos) { @@ -433,9 +433,8 @@ class CP { }; // compute the error in the loss function and find the fit const auto norm_cp = factor_norm(); // ||T_CP||_2 - const auto squared_norm_error = norm_ref_sq + - norm_cp - - 2.0 * ref_dot_cp; // ||T - T_CP||_2^2 + const auto squared_norm_error = + norm_ref_sq + norm_cp - 2.0 * ref_dot_cp; // ||T - T_CP||_2^2 // N.B. squared_norm_error is very noisy // TA_ASSERT(squared_norm_error >= - 1e-8); const auto norm_error = sqrt(abs(squared_norm_error)); From 20268329c6436baf705f0b0930ffbc982d7dbb6b Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 23 Jun 2025 15:45:03 -0400 Subject: [PATCH 20/21] format --- src/TiledArray/math/solvers/cp/cp_als.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/TiledArray/math/solvers/cp/cp_als.h b/src/TiledArray/math/solvers/cp/cp_als.h index 1bcec870ad..2ab88d0c8b 100644 --- a/src/TiledArray/math/solvers/cp/cp_als.h +++ b/src/TiledArray/math/solvers/cp/cp_als.h @@ -72,7 +72,7 @@ class CP_ALS : public CP { this->norm_ref_sq = this->norm_reference * this->norm_reference; } - void set_factor_matrices(std::vector> & factors){ + void set_factor_matrices(std::vector>& factors) { cp_factors = factors; factors_set = true; } @@ -100,10 +100,9 @@ class CP_ALS : public CP { rank_trange, reference.trange().data()[i]); cp_factors.emplace_back(factor); } - }else if(factors_set){ + } else if (factors_set) { // Do nothing and don't throw an error. - } - else { + } else { TA_EXCEPTION("Currently no implementation to increase or change rank"); } From ddd6e34d19e8c9fd3aaf937f6c84f269e44e287b Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 4 Sep 2025 15:39:00 -0400 Subject: [PATCH 21/21] Bump BTAS tag --- external/versions.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/external/versions.cmake b/external/versions.cmake index 1652142209..0a9586e072 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -17,8 +17,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG bd84a52766ab497dedc2f15f2162fb0eb7ec4653) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) -set(TA_TRACKED_BTAS_TAG 74dd9277c71043c564f931c9d1f07842548d2349) -set(TA_TRACKED_BTAS_PREVIOUS_TAG 26646416e5f5829dc13d0d97fb15ae5c01b78e82) +set(TA_TRACKED_BTAS_TAG 9c8c8f68fee2b82e64755270a8348e4612cf9941) +set(TA_TRACKED_BTAS_PREVIOUS_TAG 74dd9277c71043c564f931c9d1f07842548d2349) set(TA_TRACKED_LIBRETT_TAG 6eed30d4dd2a5aa58840fe895dcffd80be7fbece) set(TA_TRACKED_LIBRETT_PREVIOUS_TAG 354e0ccee54aeb2f191c3ce2c617ebf437e49d83)