[hist] Implement initial RVariableBinAxis

hahnjo · hahnjo · commit 37f25cf8c1e9 · 2025-07-16T09:43:07.000+02:00
As for the initial version of RRegularAxis, this first commit only
exposes ComputeLinearizedIndex with a simple linear search.
diff --git a/hist/histv7/CMakeLists.txt b/hist/histv7/CMakeLists.txt
@@ -2,6 +2,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTHist
   HEADERS
     ROOT/RLinearizedIndex.hxx
     ROOT/RRegularAxis.hxx
+    ROOT/RVariableBinAxis.hxx
   NO_SOURCES
   DEPENDENCIES
     Core
diff --git a/hist/histv7/doc/DesignImplementation.md b/hist/histv7/doc/DesignImplementation.md
@@ -71,3 +71,6 @@ The implementation uses standard [C++17](https://en.cppreference.com/w/cpp/17.ht
  * No ROOT types, to make sure the histogram package can be compiled standalone.
 
 Small objects are passed by value instead of by reference (`RBinIndex`, `RWeight`).
+
+Complex objects, such as `std::vector`, that have to be copied (for example in a constructor) are also accepted by value.
+This allows a single overload that can efficiently take expiring ("moved") objects.
diff --git a/hist/histv7/inc/ROOT/RVariableBinAxis.hxx b/hist/histv7/inc/ROOT/RVariableBinAxis.hxx
@@ -0,0 +1,85 @@
+#ifndef ROOT_RVariableBinAxis
+#define ROOT_RVariableBinAxis
+
+#include "RLinearizedIndex.hxx"
+
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+namespace ROOT {
+namespace Experimental {
+
+/**
+An axis with variable bins defined by their edges.
+
+For example, the following creates an axis with 3 log-spaced bins:
+~~~ {.cxx}
+std::vector<double> binEdges = {1, 10, 100, 1000};
+ROOT::Experimental::RVariableBinAxis axis(binEdges);
+~~~
+
+It is possible to disable underflow and overflow bins by passing `enableFlowBins = false`. In that case, arguments
+outside the axis will be silently discarded.
+*/
+class RVariableBinAxis final {
+   /// The (ordered) edges of the normal bins
+   std::vector<double> fBinEdges;
+   /// Whether underflow and overflow bins are enabled
+   bool fEnableFlowBins;
+
+public:
+   /// Construct an axis object with variable bins.
+   ///
+   /// \param[in] binEdges the (ordered) edges of the normal bins
+   /// \param[in] enableFlowBins whether to enable underflow and overflow bins
+   RVariableBinAxis(std::vector<double> binEdges, bool enableFlowBins = true)
+      : fBinEdges(std::move(binEdges)), fEnableFlowBins(enableFlowBins)
+   {
+      // FIXME: should validate that fBinEdges is sorted
+   }
+
+   std::size_t GetNumNormalBins() const { return fBinEdges.size() - 1; }
+   std::size_t GetTotalNumBins() const { return fEnableFlowBins ? fBinEdges.size() + 1 : fBinEdges.size() - 1; }
+   const std::vector<double> &GetBinEdges() const { return fBinEdges; }
+   bool AreFlowBinsEnabled() const { return fEnableFlowBins; }
+
+   friend bool operator==(const RVariableBinAxis &lhs, const RVariableBinAxis &rhs)
+   {
+      return lhs.fBinEdges == rhs.fBinEdges && lhs.fEnableFlowBins == rhs.fEnableFlowBins;
+   }
+
+   /// Compute the linarized index for a single argument.
+   ///
+   /// The normal bins have indices \f$0\f$ to \f$fBinEdges.size() - 2\f$, the underflow bin has index
+   /// \f$fBinEdges.size() - 1\f$, and the overflow bin has index \f$fBinEdges.size()\f$. If the argument is outside all
+   /// bin edges and the flow bins are disabled, the return value is invalid.
+   ///
+   /// \param[in] x the argument
+   /// \return the linearized index that may be invalid
+   RLinearizedIndex ComputeLinearizedIndex(double x) const
+   {
+      bool underflow = x < fBinEdges.front();
+      // Put NaNs into overflow bin.
+      bool overflow = !(x < fBinEdges.back());
+      if (underflow) {
+         return {fBinEdges.size() - 1, fEnableFlowBins};
+      } else if (overflow) {
+         return {fBinEdges.size(), fEnableFlowBins};
+      }
+
+      // TODO (for later): The following can be optimized with binary search...
+      for (std::size_t bin = 0; bin < fBinEdges.size() - 2; bin++) {
+         if (x < fBinEdges[bin + 1]) {
+            return {bin, true};
+         }
+      }
+      std::size_t bin = fBinEdges.size() - 2;
+      return {bin, true};
+   }
+};
+
+} // namespace Experimental
+} // namespace ROOT
+
+#endif
diff --git a/hist/histv7/test/CMakeLists.txt b/hist/histv7/test/CMakeLists.txt
@@ -1 +1,2 @@
 ROOT_ADD_GTEST(hist_regular hist_regular.cxx LIBRARIES ROOTHist)
+ROOT_ADD_GTEST(hist_variable hist_variable.cxx LIBRARIES ROOTHist)
diff --git a/hist/histv7/test/hist_test.hxx b/hist/histv7/test/hist_test.hxx
@@ -2,9 +2,11 @@
 #define hist_test
 
 #include <ROOT/RRegularAxis.hxx>
+#include <ROOT/RVariableBinAxis.hxx>
 
 #include "gtest/gtest.h"
 
 using ROOT::Experimental::RRegularAxis;
+using ROOT::Experimental::RVariableBinAxis;
 
 #endif
diff --git a/hist/histv7/test/hist_variable.cxx b/hist/histv7/test/hist_variable.cxx
@@ -0,0 +1,111 @@
+#include "hist_test.hxx"
+
+#include <limits>
+#include <vector>
+
+TEST(RVariableBinAxis, Constructor)
+{
+   static constexpr std::size_t Bins = 20;
+   std::vector<double> bins;
+   for (std::size_t i = 0; i < Bins; i++) {
+      bins.push_back(i);
+   }
+   bins.push_back(Bins);
+
+   RVariableBinAxis axis(bins);
+   EXPECT_EQ(axis.GetNumNormalBins(), Bins);
+   EXPECT_EQ(axis.GetTotalNumBins(), Bins + 2);
+   EXPECT_TRUE(axis.AreFlowBinsEnabled());
+
+   axis = RVariableBinAxis(bins, /*enableFlowBins=*/false);
+   EXPECT_EQ(axis.GetNumNormalBins(), Bins);
+   EXPECT_EQ(axis.GetTotalNumBins(), Bins);
+   EXPECT_FALSE(axis.AreFlowBinsEnabled());
+}
+
+TEST(RVariableBinAxis, Equality)
+{
+   static constexpr std::size_t Bins = 20;
+   std::vector<double> binsA;
+   for (std::size_t i = 0; i < Bins; i++) {
+      binsA.push_back(i);
+   }
+   binsA.push_back(Bins);
+
+   std::vector<double> binsB;
+   for (std::size_t i = 0; i < Bins / 2; i++) {
+      binsB.push_back(i);
+   }
+   binsB.push_back(Bins / 2);
+
+   std::vector<double> binsC;
+   for (std::size_t i = Bins / 2; i < Bins; i++) {
+      binsC.push_back(i);
+   }
+   binsC.push_back(Bins);
+
+   const RVariableBinAxis axisA(binsA);
+   const RVariableBinAxis axisANoFlowBins(binsA, /*enableFlowBins=*/false);
+   const RVariableBinAxis axisA2(binsA);
+   const RVariableBinAxis axisB(binsB);
+   const RVariableBinAxis axisC(binsC);
+
+   EXPECT_TRUE(axisA == axisA);
+   EXPECT_TRUE(axisA == axisA2);
+   EXPECT_TRUE(axisA2 == axisA);
+
+   EXPECT_FALSE(axisA == axisANoFlowBins);
+
+   EXPECT_FALSE(axisA == axisB);
+   EXPECT_FALSE(axisA == axisC);
+   EXPECT_FALSE(axisB == axisC);
+}
+
+TEST(RVariableBinAxis, ComputeLinearizedIndex)
+{
+   static constexpr std::size_t Bins = 20;
+   std::vector<double> bins;
+   for (std::size_t i = 0; i < Bins; i++) {
+      bins.push_back(i);
+   }
+   bins.push_back(Bins);
+
+   const RVariableBinAxis axis(bins);
+   const RVariableBinAxis axisNoFlowBins(bins, /*enableFlowBins=*/false);
+
+   // Underflow
+   static constexpr double NegativeInfinity = -std::numeric_limits<double>::infinity();
+   static constexpr double UnderflowLarge = -static_cast<double>(Bins);
+   static constexpr double UnderflowSmall = -0.1;
+   for (double underflow : {NegativeInfinity, UnderflowLarge, UnderflowSmall}) {
+      auto linIndex = axis.ComputeLinearizedIndex(underflow);
+      EXPECT_EQ(linIndex.index, Bins);
+      EXPECT_TRUE(linIndex.valid);
+      linIndex = axisNoFlowBins.ComputeLinearizedIndex(underflow);
+      EXPECT_EQ(linIndex.index, Bins);
+      EXPECT_FALSE(linIndex.valid);
+   }
+
+   for (std::size_t i = 0; i < Bins; i++) {
+      auto linIndex = axis.ComputeLinearizedIndex(i + 0.5);
+      EXPECT_EQ(linIndex.index, i);
+      EXPECT_TRUE(linIndex.valid);
+      linIndex = axisNoFlowBins.ComputeLinearizedIndex(i + 0.5);
+      EXPECT_EQ(linIndex.index, i);
+      EXPECT_TRUE(linIndex.valid);
+   }
+
+   // Overflow
+   static constexpr double PositiveInfinity = std::numeric_limits<double>::infinity();
+   static constexpr double NaN = std::numeric_limits<double>::quiet_NaN();
+   static constexpr double OverflowLarge = static_cast<double>(Bins * 2);
+   static constexpr double OverflowSmall = Bins + 0.1;
+   for (double overflow : {PositiveInfinity, NaN, OverflowLarge, OverflowSmall}) {
+      auto linIndex = axis.ComputeLinearizedIndex(overflow);
+      EXPECT_EQ(linIndex.index, Bins + 1);
+      EXPECT_TRUE(linIndex.valid);
+      linIndex = axisNoFlowBins.ComputeLinearizedIndex(overflow);
+      EXPECT_EQ(linIndex.index, Bins + 1);
+      EXPECT_FALSE(linIndex.valid);
+   }
+}

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`ROOT_ADD_GTEST(hist_regular hist_regular.cxx LIBRARIES ROOTHist)`
	`2`	`+ROOT_ADD_GTEST(hist_variable hist_variable.cxx LIBRARIES ROOTHist)`