From 9e20bbc4e2acc3b82df82134c37fac2c55e48f69 Mon Sep 17 00:00:00 2001 From: radhakrishnatg Date: Tue, 10 Jun 2025 11:01:57 -0400 Subject: [PATCH 1/9] Preliminary implementation --- pySTAR/__init__.py | 0 pySTAR/bigm_operators.py | 346 ++++++++++++++++ pySTAR/custom_block/LICENSE.md | 39 ++ pySTAR/custom_block/__init__.py | 0 pySTAR/custom_block/custom_block.py | 145 +++++++ pySTAR/custom_block/tests/__init__.py | 0 .../custom_block/tests/test_custom_block.py | 148 +++++++ pySTAR/hull_operators.py | 386 ++++++++++++++++++ pySTAR/symbolic_regression.py | 239 +++++++++++ pySTAR/symbolic_regression_example.py | 64 +++ pySTAR/utils.py | 47 +++ 11 files changed, 1414 insertions(+) create mode 100644 pySTAR/__init__.py create mode 100644 pySTAR/bigm_operators.py create mode 100644 pySTAR/custom_block/LICENSE.md create mode 100644 pySTAR/custom_block/__init__.py create mode 100644 pySTAR/custom_block/custom_block.py create mode 100644 pySTAR/custom_block/tests/__init__.py create mode 100644 pySTAR/custom_block/tests/test_custom_block.py create mode 100644 pySTAR/hull_operators.py create mode 100644 pySTAR/symbolic_regression.py create mode 100644 pySTAR/symbolic_regression_example.py create mode 100644 pySTAR/utils.py diff --git a/pySTAR/__init__.py b/pySTAR/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pySTAR/bigm_operators.py b/pySTAR/bigm_operators.py new file mode 100644 index 0000000..59960a1 --- /dev/null +++ b/pySTAR/bigm_operators.py @@ -0,0 +1,346 @@ +""" +This module contains all the supported operator models +for symbolic regression. The operator constraints are implemented using +the big-m approach +""" + +import pyomo.environ as pyo +from pySTAR.custom_block.custom_block import BlockData, declare_custom_block + + +@declare_custom_block("BaseOperator") +class BaseOperatorData(BlockData): + """Operator template for big-m type constraints""" + + @property + def symbolic_regression_model(self): + """Returns a pointer to the symbolic regression model""" + # Model layout: SymbolicRegressionModel + # |___SampleBlock + # |___OperatorBlock + # Regression model is two levels up, so calling the + # parent_block method twice. The model is needed to access the + # operator binary variables. + return self.parent_block().parent_block() + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """ + Builds the operator-specific constraints + + Parameters + ---------- + val_node : Var + Variables corresponding to the value at a node. It is an + indexed Var, where the index denotes the node number. + + op_bin_var : dict + Dictionary containing the binary variables associated with + the operator. Here, the keys correspond to node numbers, and the + values contain the operator binary variable. + """ + raise NotImplementedError("Operator-specific constraints are not implemented") + + def construct_convex_relaxation(self): + """Constructs a convex relaxation of the operator-specific constraints""" + raise NotImplementedError() + + +@declare_custom_block("SumOperator") +class SumOperatorData(BaseOperatorData): + """Adds constraints for the addition operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = vub - 2 * vlb + return val_node[n] - (val_node[2 * n] + val_node[2 * n + 1]) <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = vlb - 2 * vub + return val_node[n] - (val_node[2 * n] + val_node[2 * n + 1]) >= bigm * ( + 1 - op_bin_var[n] + ) + + def construct_convex_relaxation(self): + # Constraints are convex, so returning the block as it is + pass + + +@declare_custom_block("DiffOperator") +class DiffOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = 2 * vub - vlb + return val_node[n] - (val_node[2 * n] - val_node[2 * n + 1]) <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = 2 * vlb - vub + return val_node[n] - (val_node[2 * n] - val_node[2 * n + 1]) >= bigm * ( + 1 - op_bin_var[n] + ) + + def construct_convex_relaxation(self): + # Constraints are convex, so returning the block as it is + pass + + +@declare_custom_block("MultOperator") +class MultOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + product_values = [ + pyo.value(vlb * vlb), + pyo.value(vlb * vub), + pyo.value(vub * vub), + ] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = vub - min(product_values) + return val_node[n] - (val_node[2 * n] * val_node[2 * n + 1]) <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = vlb - max(product_values) + return val_node[n] - (val_node[2 * n] * val_node[2 * n + 1]) >= bigm * ( + 1 - op_bin_var[n] + ) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +@declare_custom_block("DivOperator") +class DivOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + eps = srm.eps_value + product_values = [ + pyo.value(vlb * vlb), + pyo.value(vlb * vub), + pyo.value(vub * vub), + ] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = max(product_values) - vlb + return (val_node[n] * val_node[2 * n + 1]) - val_node[2 * n] <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = min(product_values) - vub + return (val_node[n] * val_node[2 * n + 1]) - val_node[2 * n] >= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_modes_set) + def avoid_zero_constraint(_, n): + # Ensures that the denominator is away from zero + # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 + return op_bin_var[n] * val_node[2 * n + 1] ** 2 >= op_bin_var[n] - 1 + eps + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +@declare_custom_block("SqrtOperator") +class SqrtOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = max(vlb**2, vub**2) - vlb + return val_node[n] ** 2 - val_node[2 * n + 1] <= bigm * (1 - op_bin_var[n]) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = -vub + return val_node[n] ** 2 - val_node[2 * n + 1] >= bigm * (1 - op_bin_var[n]) + + @self.Constraint(srm.non_terminal_modes_set) + def non_negativity_constraint(_, n): + # Ensures that the denominator is away from zero + # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 + return op_bin_var[n] * val_node[2 * n + 1] >= 0 + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +@declare_custom_block("ExpOperator") +class ExpOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = vub + return val_node[n] - pyo.exp(val_node[2 * n + 1]) <= bigm * ( + 1 - op_bin_var[n] + ) + + # NOTE: Would this constrain the value of v[2*n+1]? + if vub >= 10: + _bigm = vlb - 1e5 + else: + _bigm = vlb - pyo.exp(vub) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + return val_node[n] - pyo.exp(val_node[2 * n + 1]) >= _bigm * ( + 1 - op_bin_var[n] + ) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +@declare_custom_block("LogOperator") +class LogOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + eps = srm.eps_value + + # NOTE: Would this constrain the value of v[2*n+1]? + # Not certain about this logic. Need to doublecheck. + if vub - vlb >= 10: + _bigm = 1e5 + elif pyo.exp(vub - vlb) < -1e-5: + _bigm = -1e5 + else: + _bigm = pyo.exp(vub - vlb) + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + return pyo.exp(val_node[n]) - val_node[2 * n + 1] <= _bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = -vub + return pyo.exp(val_node[n]) - val_node[2 * n + 1] >= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_modes_set) + def avoid_zero_constraint(_, n): + # Ensures that the denominator is away from zero + # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 + return op_bin_var[n] * val_node[2 * n + 1] >= op_bin_var[n] - 1 + eps + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +@declare_custom_block("SquareOperator") +class SquareOperatorData(BaseOperatorData): + """Template for difference operator""" + + def build(self, val_node: pyo.Var, op_bin_var: dict): + """Function containing operator-specific constraints""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + @self.Constraint(srm.non_terminal_nodes_set) + def upper_bound_constraint(_, n): + bigm = vub + return val_node[n] - val_node[2 * n + 1] ** 2 <= bigm * (1 - op_bin_var[n]) + + @self.Constraint(srm.non_terminal_nodes_set) + def lower_bound_constraint(_, n): + bigm = vlb - max(vlb**2, vub**2) + return val_node[n] - val_node[2 * n + 1] ** 2 >= bigm * (1 - op_bin_var[n]) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +# pylint: disable = undefined-variable +BIGM_OPERATORS = { + "sum": SumOperator, + "diff": DiffOperator, + "mult": MultOperator, + "div": DivOperator, + "sqrt": SqrtOperator, + "log": LogOperator, + "exp": ExpOperator, + "square": SquareOperator, +} + + +@declare_custom_block("BigmSampleBlock") +class BigmSampleBlockData(BlockData): + """Class for evaluating the expression tree for each sample""" + + @property + def symbolic_regression_model(self): + """Returns a pointer to the symbolic regression model""" + return self.parent_block() + + # pylint: disable=attribute-defined-outside-init + def build(self): + """Builds the expression tree for a given sample""" + srm = self.symbolic_regression_model + self.val_node = pyo.Var( + srm.nodes_set, + bounds=(srm.var_bounds["lb"], srm.var_bounds["ub"]), + doc="Value at each node", + ) + + for op in srm.operators_set: + # Get binary variables corresponding to the opeartor + # and save them in a dictionary. Here, the keys are node + # numbers and the values are Var objects. + op_bin_vars = dict(srm.select_operator[:, op].wildcard_items()) + setattr( + self, + op + "_operator", + BIGM_OPERATORS[op]( + model_options={ + "val_node": self.val_node, + "op_bin_var": op_bin_vars, + } + ), + ) diff --git a/pySTAR/custom_block/LICENSE.md b/pySTAR/custom_block/LICENSE.md new file mode 100644 index 0000000..c9c8b5d --- /dev/null +++ b/pySTAR/custom_block/LICENSE.md @@ -0,0 +1,39 @@ +The source code in this folder is taken from the Pyomo repository (https://github.com/Pyomo/pyomo), +and modified as per the needs of PRIMO. See `pyomo/core/base/block.py` for the original source code. + +LICENSE +======= + +Copyright (c) 2008-2025 National Technology and Engineering Solutions of +Sandia, LLC . Under the terms of Contract DE-NA0003525 with National +Technology and Engineering Solutions of Sandia, LLC , the U.S. +Government retains certain rights in this software. + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +* Neither the name of the Sandia National Laboratories nor the names of +its contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/pySTAR/custom_block/__init__.py b/pySTAR/custom_block/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pySTAR/custom_block/custom_block.py b/pySTAR/custom_block/custom_block.py new file mode 100644 index 0000000..85f0de0 --- /dev/null +++ b/pySTAR/custom_block/custom_block.py @@ -0,0 +1,145 @@ +# ___________________________________________________________________________ +# +# Pyomo: Python Optimization Modeling Objects +# Copyright (c) 2008-2025 +# National Technology and Engineering Solutions of Sandia, LLC +# Under the terms of Contract DE-NA0003525 with National Technology and +# Engineering Solutions of Sandia, LLC, the U.S. Government retains certain +# rights in this software. +# This software is distributed under the 3-clause BSD License. +# ___________________________________________________________________________ + + +""" +This module modifies the declare_custom_block decorator available in +pyomo.core.base.block to set a default rule argument. +""" + +# Standard libs +import sys + +# Installed libs +# pylint: disable = unused-import +# Although BlockData is not needed for this file, we are importing it so that +# both BlockData and declare_custom_block can be imported from this file when needed. +# Otherwise, BlockData needs to be imported separately from Pyomo. +from pyomo.core.base.block import ( + Block, + BlockData, + ScalarCustomBlockMixin, + UnindexedComponent_set, +) + + +def _default_rule(model_options): + """ + Default rule for custom blocks + + Parameters + ---------- + model_options : dict + Dictionary of options needed to construct the block model + """ + + def _rule(blk, *args): + try: + # Attempt to build the model + blk.build(*args, **model_options) + + except AttributeError: + # build method is not implemented in the BlockData class + # Returning an empty Pyomo Block + pass + + return _rule + + +# pylint: disable = protected-access, no-member +class CustomBlock(Block): + """The base class used by instances of custom block components""" + + def __init__(self, *args, **kwargs): + config = kwargs.pop("model_options", {}) + kwargs.setdefault("rule", _default_rule(config)) + + if self._default_ctype is not None: + kwargs.setdefault("ctype", self._default_ctype) + Block.__init__(self, *args, **kwargs) + + def __new__(cls, *args, **kwargs): + if cls.__bases__[0] is not CustomBlock: + # we are creating a class other than the "generic" derived + # custom block class. We can assume that the routing of the + # generic block class to the specific Scalar or Indexed + # subclass has already occurred and we can pass control up + # to (toward) object.__new__() + return super().__new__(cls, *args, **kwargs) + # If the first base class is this CustomBlock class, then the + # user is attempting to create the "generic" block class. + # Depending on the arguments, we need to map this to either the + # Scalar or Indexed block subclass. + if not args or (args[0] is UnindexedComponent_set and len(args) == 1): + return super().__new__(cls._scalar_custom_block, *args, **kwargs) + else: + return super().__new__(cls._indexed_custom_block, *args, **kwargs) + + +def declare_custom_block(name): + """Decorator to declare components for a custom block data class + + >>> @declare_custom_block(name="FooBlock") + ... class FooBlockData(BlockData): + ... # custom block data class + ... def build(self, *args, **kwargs): # Default rule argument + ... pass + """ + + def block_data_decorator(block_data): + # this is the decorator function that creates the block + # component classes + + # Declare the new Block component (derived from CustomBlock) + # corresponding to the BlockData that we are decorating + # + # Note the use of `type(CustomBlock)` to pick up the metaclass + # that was used to create the CustomBlock (in general, it should + # be `type`) + comp = type(CustomBlock)( + name, # name of new class + (CustomBlock,), # base classes + # class body definitions (populate the new class' __dict__) + { + # ensure the created class is associated with the calling module + "__module__": block_data.__module__, + # Default IndexedComponent data object is the decorated class: + "_ComponentDataClass": block_data, + # By default this new block does not declare a new ctype + "_default_ctype": None, + }, + ) + + # Declare Indexed and Scalar versions of the custom block. We + # will register them both with the calling module scope, and + # with the CustomBlock (so that CustomBlock.__new__ can route + # the object creation to the correct class) + comp._indexed_custom_block = type(comp)( + "Indexed" + name, + (comp,), + { # ensure the created class is associated with the calling module + "__module__": block_data.__module__ + }, + ) + comp._scalar_custom_block = type(comp)( + "Scalar" + name, + (ScalarCustomBlockMixin, block_data, comp), + { # ensure the created class is associated with the calling module + "__module__": block_data.__module__ + }, + ) + + # Register the new Block types in the same module as the BlockData + for _cls in (comp, comp._indexed_custom_block, comp._scalar_custom_block): + setattr(sys.modules[block_data.__module__], _cls.__name__, _cls) + return block_data + + return block_data_decorator diff --git a/pySTAR/custom_block/tests/__init__.py b/pySTAR/custom_block/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pySTAR/custom_block/tests/test_custom_block.py b/pySTAR/custom_block/tests/test_custom_block.py new file mode 100644 index 0000000..e963360 --- /dev/null +++ b/pySTAR/custom_block/tests/test_custom_block.py @@ -0,0 +1,148 @@ +################################################################################# +# PRIMO - The P&A Project Optimizer was produced under the Methane Emissions +# Reduction Program (MERP) and National Energy Technology Laboratory's (NETL) +# National Emissions Reduction Initiative (NEMRI). +# +# NOTICE. This Software was developed under funding from the U.S. Government +# and the U.S. Government consequently retains certain rights. As such, the +# U.S. Government has been granted for itself and others acting on its behalf +# a paid-up, nonexclusive, irrevocable, worldwide license in the Software to +# reproduce, distribute copies to the public, prepare derivative works, and +# perform publicly and display publicly, and to permit others to do so. +################################################################################# + +""" +This module modifies the declare_custom_block decorator available in +pyomo.core.base.block to set a default rule argument. +""" + +# Installed libs +import pyomo.environ as pyo +import pytest + +# User-defined libs +from primo.custom_block.custom_block import BlockData, declare_custom_block + + +# pylint: disable = undefined-variable, attribute-defined-outside-init +# pylint: disable = missing-function-docstring +def test_custom_block_1(): + """Tests the decorator without the `build` method""" + + @declare_custom_block("FooBlock") + class FooBlockData(BlockData): + """Construct an empty custom block""" + + m = pyo.ConcreteModel() + m.blk_without_index = FooBlock() + m.blk_with_index = FooBlock([1, 2, 3, 4]) + + assert isinstance(m.blk_without_index, FooBlockData) + for p in m.blk_with_index: + assert isinstance(m.blk_with_index[p], FooBlockData) + assert len(m.blk_with_index) == 4 + + +def test_custom_block_2(): + """Tests the decorator with the `build` method, but without options""" + + @declare_custom_block("FooBlock") + class FooBlockData(BlockData): + """A dummy custom block""" + + def build(self, *args): + self.x = pyo.Var(list(args)) + self.y = pyo.Var() + + m = pyo.ConcreteModel() + m.blk_without_index = FooBlock() + m.blk_1 = FooBlock([1, 2, 3]) + m.blk_2 = FooBlock([4, 5], [6, 7]) + + assert isinstance(m.blk_without_index, FooBlockData) + for p in m.blk_1: + assert isinstance(m.blk_1[p], FooBlockData) + + for p in m.blk_2: + assert isinstance(m.blk_2[p], FooBlockData) + + assert hasattr(m.blk_without_index, "x") + assert hasattr(m.blk_without_index, "y") + + assert len(m.blk_1) == 3 + assert len(m.blk_2) == 4 + + assert len(m.blk_1[2].x) == 1 + assert len(m.blk_2[4, 6].x) == 2 + + +def test_custom_block_3(): + """Tests the decorator with the `build` method and model options""" + + @declare_custom_block("FooBlock") + class FooBlockData(BlockData): + """A dummy custom block""" + + def build(self, *args, capex, opex): + self.x = pyo.Var(list(args)) + self.y = pyo.Var() + + self.capex = capex + self.opex = opex + + options = {"capex": 42, "opex": 24} + + m = pyo.ConcreteModel() + m.blk_without_index = FooBlock(model_options=options) + m.blk_1 = FooBlock([1, 2, 3], model_options=options) + m.blk_2 = FooBlock([4, 5], [6, 7], model_options=options) + + assert isinstance(m.blk_without_index, FooBlockData) + for p in m.blk_1: + assert isinstance(m.blk_1[p], FooBlockData) + + for p in m.blk_2: + assert isinstance(m.blk_2[p], FooBlockData) + + assert hasattr(m.blk_without_index, "x") + assert hasattr(m.blk_without_index, "y") + assert m.blk_without_index.capex == 42 + assert m.blk_without_index.opex == 24 + + assert len(m.blk_1) == 3 + assert len(m.blk_2) == 4 + + assert len(m.blk_1[2].x) == 1 + assert len(m.blk_2[4, 6].x) == 2 + + assert m.blk_1[3].capex == 42 + assert m.blk_2[4, 7].opex == 24 + + with pytest.raises(TypeError): + # missing 2 required keyword-only arguments + m.blk_3 = FooBlock() + + +def test_custom_block_4(): + """Tests if the default rule can be overwritten""" + + @declare_custom_block("FooBlock") + class FooBlockData(BlockData): + """A dummy custom block""" + + def build(self, *args): + self.x = pyo.Var(list(args)) + self.y = pyo.Var() + + def _new_rule(blk): + blk.a = pyo.Var() + blk.b = pyo.Var() + + m = pyo.ConcreteModel() + m.blk = FooBlock(rule=_new_rule) + + assert isinstance(m.blk, FooBlockData) + assert not hasattr(m.blk, "x") + assert not hasattr(m.blk, "y") + assert isinstance(m.blk.a, pyo.Var) + assert isinstance(m.blk.b, pyo.Var) diff --git a/pySTAR/hull_operators.py b/pySTAR/hull_operators.py new file mode 100644 index 0000000..f8c9449 --- /dev/null +++ b/pySTAR/hull_operators.py @@ -0,0 +1,386 @@ +""" +This module contains all the supported operator models +for symbolic regression. +""" + +import logging +from pyomo.core.base.block import BlockData +from pyomo.environ import Var, Constraint +import pyomo.environ as pyo + +from pySTAR.operators.custom_block import declare_custom_block +from pySTAR.operators.relaxation import mccormick_relaxation + +LOGGER = logging.getLogger(__name__) + + +@declare_custom_block("BaseOperator") +class BaseOperatorData(BlockData): + """Base class for defining operator models""" + + _operator_name = "base_operator" + _is_unary_operator = False + _define_aux_var_right = False + + @property + def regression_model(self): + """ + Returns the symbolic regression model + + Model structure: + main_model -> Sample block -> Node block -> Operator block + So, calling the parent_block method thrice returns the + SymbolicRegressionModel object. + """ + return self._bin_var_ref.parent_block() + + @property + def operator_binary(self): + """Returns the binary variable associated with the operator""" + return self._bin_var_ref + + # pylint: disable = attribute-defined-outside-init + def build(self, bin_var: Var): + """rule for constructing operator blocks""" + srm = bin_var.parent_block() # Symbolic regression model + lb = srm.var_bounds["lb"] + ub = srm.var_bounds["ub"] + eps = srm.eps_value + + # Declare auxiliary variables + self._bin_var_ref = bin_var # Store a reference to the binary var + self.val_node = Var(doc="Node value", bounds=(lb, ub)) + self.val_left_node = Var(doc="Left child value", bounds=(lb, ub)) + self.val_right_node = Var(doc="Right child value", bounds=(lb, ub)) + + if self._is_unary_operator: + # For unary operators, left node must not be chosen + self.val_left_node.fix(0) + + if self._define_aux_var_right: + # Declare an auxiliary variable for operators with singularity issue + self.aux_var_right = Var( + doc="Auxiliary variable for the right child", + bounds=(eps, ub), # Ensure that the RHS is strictly positive + ) + + # Define val_right_node = aux_var_right * binary_var + self.calculate_val_right_node = Constraint( + expr=self.val_right_node == bin_var * self.aux_var_right + ) + + # Add operator-specific constraints + try: + self.build_operator_model() + except NotImplementedError: + LOGGER.warning("Operator-specific model constraints are not implemented.") + + def build_operator_model(self): + """Adds operator-specific constraints""" + raise NotImplementedError("Operator-specific model is not implemented") + + # pylint: disable = no-member, attribute-defined-outside-init + def add_bound_constraints(self, val_left_node=True, val_right_node=True): + """Adds bound constraints on val_node variable""" + bin_var = self._bin_var_ref + + self.node_val_lb_con = Constraint( + expr=self.val_node.lb * bin_var <= self.val_node + ) + self.node_val_ub_con = Constraint( + expr=self.val_node <= self.val_node.ub * bin_var + ) + + if val_left_node: + self.left_node_val_lb_con = Constraint( + expr=self.val_left_node.lb * bin_var <= self.val_left_node + ) + self.left_node_val_ub_con = Constraint( + expr=self.val_left_node <= self.val_left_node.ub * bin_var + ) + + if val_right_node: + self.right_node_val_lb_con = Constraint( + expr=self.val_right_node.lb * bin_var <= self.val_right_node + ) + self.right_node_val_ub_con = Constraint( + expr=self.val_right_node <= self.val_right_node.ub * bin_var + ) + + def construct_convex_relaxation(self): + """Constructs a convex relaxation of the model""" + raise NotImplementedError("Convex relaxation is not supported") + + +# pylint: disable = attribute-defined-outside-init, missing-class-docstring +@declare_custom_block("SumOperator") +class SumOperatorData(BaseOperatorData): + _operator_name = "sum" + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + self.evaluate_val_node = Constraint( + expr=self.val_node == self.val_left_node + self.val_right_node + ) + self.add_bound_constraints() + + def construct_convex_relaxation(self): + # Operator model is convex by default, so return. + pass + + +@declare_custom_block("DiffOperator") +class DiffOperatorData(BaseOperatorData): + _operator_name = "diff" + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + self.evaluate_val_node = Constraint( + expr=self.val_node == self.val_left_node - self.val_right_node + ) + self.add_bound_constraints() + + def construct_convex_relaxation(self): + # Operator model is convex by default, so return. + pass + + +@declare_custom_block("MultOperator") +class MultOperatorData(BaseOperatorData): + _operator_name = "mult" + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + self.evaluate_val_node = Constraint( + expr=self.val_node == self.val_left_node * self.val_right_node + ) + self.add_bound_constraints() + + def construct_convex_relaxation(self): + self.evaluate_val_node.deactivate() + mccormick_relaxation( + self, z=self.val_node, x=self.val_left_node, y=self.val_right_node + ) + + +@declare_custom_block("DivOperator") +class DivOperatorData(BaseOperatorData): + _operator_name = "div" + _define_aux_var_right = True + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + self.evaluate_val_node = Constraint( + expr=self.val_node * self.aux_var_right == self.val_left_node + ) + self.add_bound_constraints(val_right_node=False) + + def construct_convex_relaxation(self): + self.evaluate_val_node.deactivate() + self.calculate_val_right_node.deactivate() + mccormick_relaxation( + self, z=self.val_left_node, x=self.val_node, y=self.aux_var_right + ) + mccormick_relaxation( + self, + z=self.val_right_node, + x=self.get_operator_binary_var(), + y=self.aux_var_right, + ) + + +# pylint: disable = no-member +@declare_custom_block("SquareOperator") +class SquareOperatorData(BaseOperatorData): + _operator_name = "square" + _is_unary_operator = True + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + # For unary operator, left node is fixed to zero. + self.add_bound_constraints(val_left_node=False) + + # Evaluate the value at the node + self.evaluate_val_node = Constraint( + expr=self.val_node == self.val_right_node * self.val_right_node + ) + + def construct_convex_relaxation(self): + # Need to implement outer-approximation + # Update bounds on val_node and val_right_node variables + ub_val_node = self.val_node.ub + ub_val_right_node = self.val_right_node.ub + lb_val_right_node = self.val_right_node.lb + + self.val_node.setub(min(ub_val_node, ub_val_right_node**2)) + self.val_right_node.setub(min(pyo.sqrt(ub_val_node), ub_val_right_node)) + self.val_node.setlb(0) + self.val_right_node.setlb(max(-pyo.sqrt(ub_val_node), lb_val_right_node)) + + raise NotImplementedError() + + +@declare_custom_block("SqrtOperator") +class SqrtOperatorData(BaseOperatorData): + _operator_name = "sqrt" + _is_unary_operator = True + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + self.add_bound_constraints(val_left_node=False) + + # Expressing the constraint in this manner makes it a + # quadratic constraint, instead of a general nonlinear constraint + self.evaluate_val_node = Constraint( + expr=self.val_node * self.val_node == self.val_right_node + ) + + def construct_convex_relaxation(self): + # Make the domain of the variable non-negative + ub_val_node = self.val_node.ub + ub_val_right_node = self.val_right_node.ub + + self.val_node.setub(min(ub_val_node, pyo.sqrt(ub_val_right_node))) + self.val_right_node.setub(min(ub_val_node**2, ub_val_right_node)) + self.val_right_node.setlb(0) + self.val_node.setlb(0) + + self.del_component(self.node_val_lb_con) + self.del_component(self.right_node_val_lb_con) + + raise NotImplementedError() + + +@declare_custom_block("ExpOperator") +class ExpOperatorData(BaseOperatorData): + _operator_name = "exp" + _is_unary_operator = True + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + # Update the domain of variables + ub_val_node = self.val_node.ub + ub_val_right_node = self.val_right_node.ub + + self.val_node.setub(min(ub_val_node, pyo.exp(ub_val_right_node))) + self.val_right_node.setub(min(pyo.log(ub_val_node), ub_val_right_node)) + self.val_node.setlb(0) + + # To avoid numerical issues, we do not let the lower bound of the + # argument of the exp function go below -10 + if self.val_right_node.lb < -10: + self.val_right_node.setlb(-10) + + self.add_bound_constraints(val_left_node=False) + self.del_component(self.node_val_lb_con) + + # Exponential term does not become zero, so multiply it + # with the operator binary variable to make it zero + self.evaluate_val_node = Constraint( + expr=self.val_node == self._bin_var_ref * pyo.exp(self.val_right_node) + ) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +@declare_custom_block("LogOperator") +class LogOperatorData(BaseOperatorData): + _operator_name = "log" + _is_unary_operator = True + _define_aux_var_right = True + + def build_operator_model(self): + """Constructs constraint that evaluates the value at the node""" + # Update bounds on variables + ub_val_node = self.val_node.ub + ub_val_right_node = self.aux_var_right.ub + + self.val_node.setub(min(ub_val_node, pyo.log(ub_val_right_node))) + self.aux_var_right.setub(min(pyo.exp(ub_val_node), ub_val_right_node)) + self.val_right_node.setub(self.aux_var_right.ub) + + # Log term need not vanish when the operator is not selected, so + # multiply it with the operator binary variable to make it zero + bin_var = self._bin_var_ref + self.evaluate_val_node = Constraint( + expr=self.val_node == bin_var * pyo.log(self.aux_var_right) + ) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + +OPERATOR_MODELS = { + "sum": SumOperator, + "diff": DiffOperator, + "mult": MultOperator, + "div": DivOperator, + "square": SquareOperator, + "sqrt": SqrtOperator, + "log": LogOperator, + "exp": ExpOperator, +} + + +@declare_custom_block("SampleBlock") +class SampleBlockData(BlockData): + def build(self, s): + """rule for building the expression model for each sample""" + pb = self.parent_block() + input_data = pb.input_data_ref.loc[s] + output_value = pb.output_data_ref.loc[s] + + @self.Block(pb.nodes_set) + def node(blk, n): + # Declare a variable to track the value at this node + blk.val_node = Var( + doc="Value at the node", + bounds=(pb.var_bounds["lb"], pb.var_bounds["ub"]), + ) + + # Defining an expression for the val_node variable arising from operands + blk.value_from_operands = pyo.Expression( + expr=sum( + pb.select_operator[n, op] * input_data[op] + for op in input_data.index + ) + + pb.constant_val[n] + ) + + # Define a constraint to evaluate the value from all disjuncts + if n in pb.terminal_nodes_set: + # For terminal nodes, val_node = value from the operands chosen + blk.evaluate_val_node_var = Constraint( + expr=blk.val_node == blk.value_from_operands + ) + + for n in pb.non_terminal_nodes_set: + op_blocks = [] + for op in pb.operators_set: + # Build operator blocks for all unary and binary operators + setattr( + self.node[n], + "op_" + op, + OPERATOR_MODELS[op](config={"bin_var": pb.select_operator[n, op]}), + ) + op_blocks.append(getattr(self.node[n], "op_" + op)) + + self.node[n].evaluate_val_node_var = Constraint( + expr=self.node[n].val_node + == sum(blk.val_node for blk in op_blocks) + + self.node[n].value_from_operands + ) + + self.node[n].evaluate_val_left_node = Constraint( + expr=self.node[2 * n].val_node + == sum(blk.val_left_node for blk in op_blocks) + ) + self.node[n].evaluate_val_right_node = Constraint( + expr=self.node[2 * n + 1].val_node + == sum(blk.val_right_node for blk in op_blocks) + ) + + self.square_of_error = pyo.Expression( + expr=(self.node[1].val_node - output_value) ** 2 + ) diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py new file mode 100644 index 0000000..c6d98d9 --- /dev/null +++ b/pySTAR/symbolic_regression.py @@ -0,0 +1,239 @@ +from typing import Optional +import pandas as pd +import pyomo.environ as pyo +from pyomo.environ import Var, Constraint +from pySTAR.operators.operators import BaseOperatorData, SampleBlock + + +class SymbolicRegressionModel(pyo.ConcreteModel): + """Builds the symbolic regression model for a given data""" + + def __init__( + self, + *args, + data: pd.DataFrame, + input_columns: list, + output_column: str, + tree_depth: int, + unary_operators: Optional[list] = None, + binary_operators: Optional[list] = None, + var_bounds: tuple = (-100, 100), + constant_bounds: tuple = (-100, 100), + eps: float = 1e-4, + **kwds, + ): + super().__init__(*args, **kwds) + + if unary_operators is None: + # If not specified, use all supported unary operators + unary_operators = ["square", "sqrt", "exp", "log"] + + if binary_operators is None: + # If not specified, use all supported binary operators + binary_operators = ["sum", "diff", "mult", "div"] + + # Save a reference to the input and output data + self.input_data_ref = data[input_columns] + self.output_data_ref = data[output_column] + self.var_bounds = pyo.Param( + ["lb", "ub"], + initialize={"lb": var_bounds[0], "ub": var_bounds[1]}, + ) + self.constant_bounds = pyo.Param( + ["lb", "ub"], + initialize={"lb": constant_bounds[0], "ub": constant_bounds[1]}, + mutable=True, + ) + self.eps_val = pyo.Param(initialize=eps, mutable=True, domain=pyo.PositiveReals) + self.min_tree_size = pyo.Param( + initialize=1, + mutable=True, + within=pyo.PositiveIntegers, + doc="Minimum number of active nodes in the tree", + ) + self.max_tree_size = pyo.Param( + initialize=2**tree_depth, + mutable=True, + within=pyo.PositiveIntegers, + doc="Maximum number of active nodes in the tree", + ) + + # Define model sets + # pylint: disable = no-member + self.binary_operators_set = pyo.Set(initialize=binary_operators) + self.unary_operators_set = pyo.Set(initialize=unary_operators) + self.operators_set = self.binary_operators_set.union(self.unary_operators_set) + self.operands_set = pyo.Set(initialize=input_columns + ["cst"]) + self.collective_operator_set = self.operators_set.union(self.operands_set) + + self.non_terminal_nodes_set = pyo.RangeSet(2 ** (tree_depth - 1) - 1) + self.terminal_nodes_set = pyo.RangeSet(2 ** (tree_depth - 1), 2**tree_depth - 1) + self.nodes_set = self.non_terminal_nodes_set.union(self.terminal_nodes_set) + + # Build the expression model + self._build_expression_tree_model() + + # Calculate the output value for each sample + self.samples = SampleBlock(self.input_data_ref.index.to_list()) + + def _build_expression_tree_model(self): + """Builds constraints that return a valid expression tree""" + # Define variables + # pylint: disable = attribute-defined-outside-init, not-an-iterable + self.select_node = Var( + self.nodes_set, + domain=pyo.Binary, + doc="If 1, an operator/operator is assigned to the node", + ) + self.select_operator = Var( + self.nodes_set, + self.collective_operator_set, + domain=pyo.Binary, + doc="Binary variable associated with the operator at each node", + ) + self.constant_val = Var(self.nodes_set, doc="Constant values in the expression") + + # Binary and unary operators must not be chosen for terminal nodes + for n in self.terminal_nodes_set: + for op in self.operators_set: + self.select_operator[n, op].fix(0) + + # Constant must only be present in the left node (i.e., even-numbered nodes) + for n in self.nodes_set: + if n % 2 == 1: + self.select_operator[n, "cst"].fix(0) + + # Begin constructing essential constraints + # If a node is active, then either an operator or an operand + # must be chosen + @self.Constraint(self.nodes_set) + def active_node_active_operator(blk, n): + return blk.select_node[n] == sum( + blk.select_operator[n, op] for op in self.collective_operator_set + ) + + # If a binary or unary operator is chosen, then right child must be present + @self.Constraint(self.non_terminal_nodes_set) + def right_child_presence(blk, n): + return blk.select_node[2 * n + 1] == sum( + blk.select_operator[n, op] for op in self.operators_set + ) + + # If a binary operator is chosen, then left child must be present + @self.Constraint(self.non_terminal_nodes_set) + def left_child_presence(blk, n): + return blk.select_node[2 * n] == sum( + blk.select_operator[n, op] for op in self.binary_operators_set + ) + + # Set constant value to zero, if constant operator is not selected + @self.Constraint(self.nodes_set) + def constant_lb_con(blk, n): + return ( + blk.constant_bounds["lb"] * blk.select_operator[n, "cst"] + <= blk.constant_val[n] + ) + + @self.Constraint(self.nodes_set) + def constant_ub_con(blk, n): + return ( + blk.constant_val[n] + <= blk.constant_bounds["ub"] * blk.select_operator[n, "cst"] + ) + + # pylint: disable = attribute-defined-outside-init + def add_objective(self, objective_type="sse"): + """Appends objective function to the model + + Parameters + ---------- + objective_type : str, optional + Choice of objective function. Supported objective functions are + Sum of squares of errors: "sse" + Bayesian Information Criterion: "bic", by default "sse" + """ + if objective_type == "sse": + # build SSR objective + self.sse = pyo.Objective(expr=sum(self.samples[:].square_of_error)) + + elif objective_type == "bic": + # Build BIC objective + pass + + else: + raise ValueError( + f"Specified objective_type: {objective_type} is not supported." + ) + + def add_redundancy_cuts(self): + """Adds cuts to eliminate redundance from the model""" + + # Do not choose the same operand for both the child nodes. + # This cut avoids expressions of type: x + x, x - x, x * x, x / x. + @self.Constraint(self.non_terminal_nodes_set, self.operands_set) + def redundant_bin_op_same_operand(blk, n, op): + if op == "cst": + # Both child nodes cannot take a constant, so skip this case + return Constraint.Skip + + return ( + blk.select_operator[2 * n, op] + blk.select_operator[2 * n + 1, op] + <= blk.select_node[n] + ) + + # Remove Associative operator combinations + + def add_symmetry_breaking_cuts(self, sample_name=None): + """Adds cuts to eliminate symmetrical trees from the model""" + if sample_name is None: + # If the sample is not specified, apply symmetry breaking + # cuts to the first sample + sample_name = self.input_data_ref.iloc[0].name + + blk = self.samples[sample_name] + symmetric_operators = [ + op for op in ["sum", "mult"] if op in self.binary_operators_set + ] + + def add_implication_cuts(self): + """ + Adds cuts to avoid a certain combinations of operators based on the input data + """ + + def add_tree_size_constraint(self, size: int): + """Adds a constraint to constrain the size of the tree""" + self.max_tree_size = size + + # If the constraint exists, then updating the parameter value is sufficient + # pylint: disable = attribute-defined-outside-init + if not hasattr(self, "tree_size_con"): + self.tree_size_constraint = Constraint( + expr=sum(self.select_node[n] for n in self.nodes_set) + <= self.max_tree_size + ) + + def relax_integrality_constraints(self): + """Relaxes integrality requirement on all binary variables""" + self.select_node.domain = pyo.UnitInterval + self.select_operator.domain = pyo.UnitInterval + + def add_integrality_constraints(self): + """Adds integrality requirement on node and operator variables""" + self.select_node.domain = pyo.Binary + self.select_operator.domain = pyo.Binary + + def relax_nonconvex_constraints(self): + """Convexifies all non-convex nonlinear constraints""" + for blk in self.component_data_objects(pyo.Block): + if isinstance(blk, BaseOperatorData): + blk.construct_convex_relaxation() + + def get_selected_operators(self): + """ + Returns the list of selected nodes along with the selected + operator at that node + """ + return [op for op, val in self.select_operator.items() if val.value > 0.95] + + def selected_tree_to_expression(self): + """Returns the optimal expression as a string""" diff --git a/pySTAR/symbolic_regression_example.py b/pySTAR/symbolic_regression_example.py new file mode 100644 index 0000000..e4a7dce --- /dev/null +++ b/pySTAR/symbolic_regression_example.py @@ -0,0 +1,64 @@ +from symbolic_regression import SymbolicRegressionModel +import numpy as np +import pandas as pd +import pyomo.environ as pyo + +np.random.seed(42) +data = pd.DataFrame(np.random.uniform(low=1, high=3, size=(10, 2)), columns=["x1", "x2"]) +data["y"] = 2.5 * np.exp(data["x1"]) * np.log(data["x2"]) +in_cols = ["x1", "x2"] +out_cols = "y" +print(data) + +# data = pd.read_csv("3_4_simulation_data_10.csv") +# in_cols = ["liquid_inlet_conc_mol_comp_H2SO4", "solid_inlet_flow_mass"] +# out_cols = "liquid_outlet_flow_vol" + + +def build_model(): + m = SymbolicRegressionModel( + data=data, + input_columns=in_cols, + output_column=out_cols, + tree_depth=4, + unary_operators=["log", "exp"], + binary_operators=["sum", "diff", "mult"], + var_bounds=(-100, 100), + constant_bounds=(-100, 100), + ) + + return m + + +if __name__ == "__main__": + solver = "baron" + mdl = build_model() + mdl.add_redundancy_cuts() + # mdl.add_tree_size_constraint(3) + + if solver == "scip": + solver = pyo.SolverFactory("scip") + solver.solve(mdl, tee=True) + + elif solver == "gurobi": + mdl.solve_with_gurobi() + + elif solver == "baron": + solver = pyo.SolverFactory("gams") + solver.solve(mdl, solver="baron", tee=True) + + print(mdl.get_selected_operators()) + mdl.constant_val.pprint() + + results = pd.DataFrame() + results["sim_data"] = data[out_cols] + results["prediction"] = [ + mdl.samples[s].node[1].val_node.value + for s in mdl.samples + ] + results["square_of_error"] = [ + mdl.samples[s].square_of_error.expr() + for s in mdl.samples + ] + + print(results) diff --git a/pySTAR/utils.py b/pySTAR/utils.py new file mode 100644 index 0000000..ee1a79c --- /dev/null +++ b/pySTAR/utils.py @@ -0,0 +1,47 @@ +from gurobipy import nlfunc +import pyomo.environ as pyo +from pySTAR.operators.operators import ( + LogOperatorData, + ExpOperatorData, +) + + +def solve_with_gurobi(m): + """Uses Gurobibpy interface to solve the MINLP""" + + for blk in m.component_data_objects(pyo.Block): + if isinstance(blk, (LogOperatorData, ExpOperatorData)): + # Deactivate the nonlinear constraint + blk.evaluate_val_node.deactivate() + + # pylint: disable = protected-access + grb = pyo.SolverFactory("gurobi_persistent") + grb.set_instance(m) + gm = grb._solver_model + pm_to_gm = grb._pyomo_var_to_solver_var_map + + for blk in m.component_data_objects(pyo.Block): + if isinstance(blk, LogOperatorData): + # Add the nonlinear constraint + gm.addConstr( + pm_to_gm[blk.val_node] + == pm_to_gm[blk.operator_binary] + * nlfunc.log(pm_to_gm[blk.aux_var_right]) + ) + + elif isinstance(blk, ExpOperatorData): + # Add the nonlinear constraint + gm.addConstr( + pm_to_gm[blk.val_node] + == pm_to_gm[blk.operator_binary] + * nlfunc.exp(pm_to_gm[blk.val_right_node]) + ) + + # Solve the optimization model + grb.solve(tee=True) + + # Activate the constraint back + for blk in m.component_data_objects(pyo.Block): + if isinstance(blk, (LogOperatorData, ExpOperatorData)): + # Deactivate the nonlinear constraint + blk.evaluate_val_node.activate() From cc14a5099dd388593a6456a5296a5eaaf71ea096 Mon Sep 17 00:00:00 2001 From: Radhakrishna Date: Mon, 7 Jul 2025 15:36:10 -0400 Subject: [PATCH 2/9] Updated bigm operator models --- pySTAR/bigm_operators.py | 134 ++++++++++++++++---------- pySTAR/custom_block/custom_block.py | 5 +- pySTAR/symbolic_regression.py | 109 +++++++++++++++++---- pySTAR/symbolic_regression_example.py | 33 +++---- pySTAR/utils.py | 57 ++++++++++- 5 files changed, 240 insertions(+), 98 deletions(-) diff --git a/pySTAR/bigm_operators.py b/pySTAR/bigm_operators.py index 59960a1..438dfb3 100644 --- a/pySTAR/bigm_operators.py +++ b/pySTAR/bigm_operators.py @@ -5,10 +5,12 @@ """ import pyomo.environ as pyo -from pySTAR.custom_block.custom_block import BlockData, declare_custom_block +from custom_block.custom_block import BlockData, declare_custom_block +OPTION_NAMES = ["val_node", "op_bin_var"] -@declare_custom_block("BaseOperator") + +@declare_custom_block("BaseOperator", model_options=OPTION_NAMES) class BaseOperatorData(BlockData): """Operator template for big-m type constraints""" @@ -23,7 +25,7 @@ def symbolic_regression_model(self): # operator binary variables. return self.parent_block().parent_block() - def build(self, val_node: pyo.Var, op_bin_var: dict): + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): """ Builds the operator-specific constraints @@ -45,11 +47,11 @@ def construct_convex_relaxation(self): raise NotImplementedError() -@declare_custom_block("SumOperator") +@declare_custom_block("SumOperator", OPTION_NAMES) class SumOperatorData(BaseOperatorData): """Adds constraints for the addition operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] @@ -72,12 +74,11 @@ def construct_convex_relaxation(self): pass -@declare_custom_block("DiffOperator") +@declare_custom_block("DiffOperator", OPTION_NAMES) class DiffOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the difference operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] @@ -100,12 +101,11 @@ def construct_convex_relaxation(self): pass -@declare_custom_block("MultOperator") +@declare_custom_block("MultOperator", OPTION_NAMES) class MultOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the multiplication operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] product_values = [ @@ -132,12 +132,11 @@ def construct_convex_relaxation(self): raise NotImplementedError() -@declare_custom_block("DivOperator") +@declare_custom_block("DivOperator", OPTION_NAMES) class DivOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the division operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] eps = srm.eps_value @@ -165,24 +164,23 @@ def lower_bound_constraint(_, n): def avoid_zero_constraint(_, n): # Ensures that the denominator is away from zero # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 - return op_bin_var[n] * val_node[2 * n + 1] ** 2 >= op_bin_var[n] - 1 + eps + return val_node[2 * n + 1] ** 2 >= op_bin_var[n] - 1 + eps def construct_convex_relaxation(self): raise NotImplementedError() -@declare_custom_block("SqrtOperator") +@declare_custom_block("SqrtOperator", OPTION_NAMES) class SqrtOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the square root operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] @self.Constraint(srm.non_terminal_nodes_set) def upper_bound_constraint(_, n): - bigm = max(vlb**2, vub**2) - vlb + bigm = max(pyo.value(vlb * vlb), pyo.value(vub * vub)) - vlb return val_node[n] ** 2 - val_node[2 * n + 1] <= bigm * (1 - op_bin_var[n]) @self.Constraint(srm.non_terminal_nodes_set) @@ -192,20 +190,17 @@ def lower_bound_constraint(_, n): @self.Constraint(srm.non_terminal_modes_set) def non_negativity_constraint(_, n): - # Ensures that the denominator is away from zero - # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 - return op_bin_var[n] * val_node[2 * n + 1] >= 0 + return val_node[2 * n + 1] >= (1 - op_bin_var[n]) * vlb def construct_convex_relaxation(self): raise NotImplementedError() -@declare_custom_block("ExpOperator") +@declare_custom_block("ExpOperator", OPTION_NAMES) class ExpOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the exponential operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] @@ -232,24 +227,20 @@ def construct_convex_relaxation(self): raise NotImplementedError() -@declare_custom_block("LogOperator") +@declare_custom_block("LogOperator", OPTION_NAMES) class LogOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the logarithm operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] eps = srm.eps_value # NOTE: Would this constrain the value of v[2*n+1]? - # Not certain about this logic. Need to doublecheck. - if vub - vlb >= 10: - _bigm = 1e5 - elif pyo.exp(vub - vlb) < -1e-5: - _bigm = -1e5 + if vub >= 10: + _bigm = 1e5 - vlb else: - _bigm = pyo.exp(vub - vlb) + _bigm = pyo.exp(vub) - vlb @self.Constraint(srm.non_terminal_nodes_set) def upper_bound_constraint(_, n): @@ -274,12 +265,11 @@ def construct_convex_relaxation(self): raise NotImplementedError() -@declare_custom_block("SquareOperator") +@declare_custom_block("SquareOperator", OPTION_NAMES) class SquareOperatorData(BaseOperatorData): - """Template for difference operator""" + """Adds constraints for the square operator""" - def build(self, val_node: pyo.Var, op_bin_var: dict): - """Function containing operator-specific constraints""" + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): srm = self.symbolic_regression_model vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] @@ -290,7 +280,7 @@ def upper_bound_constraint(_, n): @self.Constraint(srm.non_terminal_nodes_set) def lower_bound_constraint(_, n): - bigm = vlb - max(vlb**2, vub**2) + bigm = vlb - max(pyo.value(vlb * vlb), pyo.value(vub * vub)) return val_node[n] - val_node[2 * n + 1] ** 2 >= bigm * (1 - op_bin_var[n]) def construct_convex_relaxation(self): @@ -320,7 +310,7 @@ def symbolic_regression_model(self): return self.parent_block() # pylint: disable=attribute-defined-outside-init - def build(self): + def build(self, *args): """Builds the expression tree for a given sample""" srm = self.symbolic_regression_model self.val_node = pyo.Var( @@ -329,6 +319,29 @@ def build(self): doc="Value at each node", ) + # Value defining constraints for operands + data = srm.input_data_ref.loc[args[0]] + + @self.Constraint(srm.nodes_set) + def value_upper_bound_constraint(blk, n): + return blk.val_node[n] <= srm.constant_val[n] + sum( + data[op] * srm.select_operator[n, op] + for op in srm.operands_set + if op != "cst" + ) + srm.var_bounds["ub"] * sum( + srm.select_operator[n, op] for op in srm.operators_set + ) + + @self.Constraint(srm.nodes_set) + def value_lower_bound_constraint(blk, n): + return blk.val_node[n] >= srm.constant_val[n] + sum( + data[op] * srm.select_operator[n, op] + for op in srm.operands_set + if op != "cst" + ) + srm.var_bounds["lb"] * sum( + srm.select_operator[n, op] for op in srm.operators_set + ) + for op in srm.operators_set: # Get binary variables corresponding to the opeartor # and save them in a dictionary. Here, the keys are node @@ -337,10 +350,27 @@ def build(self): setattr( self, op + "_operator", - BIGM_OPERATORS[op]( - model_options={ - "val_node": self.val_node, - "op_bin_var": op_bin_vars, - } - ), + BIGM_OPERATORS[op](val_node=self.val_node, op_bin_var=op_bin_vars), + ) + + self.residual = pyo.Var(doc="Residual value for the sample") + self.calculate_residual = pyo.Constraint( + expr=self.residual == srm.output_data_ref[self.index()] - self.val_node[1], + doc="Computes the residual between prediction and the data", + ) + self.square_of_residual = pyo.Expression(expr=self.residual**2) + + def add_symmetry_breaking_cuts(self): + """Adds symmetry breaking cuts to the sample""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + symmetric_operators = [ + op for op in ["sum", "mult"] if op in srm.binary_operators_set + ] + + @self.Constraint(srm.non_terminal_nodes_set) + def symmetry_breaking_constraints(blk, n): + return (blk.val_node[2 * n] - blk.val_node[2 * n + 1]) >= (vlb - vub) * ( + srm.select_node[n] + - sum(srm.select_operator[n, op] for op in symmetric_operators) ) diff --git a/pySTAR/custom_block/custom_block.py b/pySTAR/custom_block/custom_block.py index 85f0de0..8ffff52 100644 --- a/pySTAR/custom_block/custom_block.py +++ b/pySTAR/custom_block/custom_block.py @@ -59,7 +59,7 @@ class CustomBlock(Block): """The base class used by instances of custom block components""" def __init__(self, *args, **kwargs): - config = kwargs.pop("model_options", {}) + config = {key: kwargs.pop(key) for key in self._build_options if key in kwargs} kwargs.setdefault("rule", _default_rule(config)) if self._default_ctype is not None: @@ -84,7 +84,7 @@ def __new__(cls, *args, **kwargs): return super().__new__(cls._indexed_custom_block, *args, **kwargs) -def declare_custom_block(name): +def declare_custom_block(name, model_options=None): """Decorator to declare components for a custom block data class >>> @declare_custom_block(name="FooBlock") @@ -115,6 +115,7 @@ def block_data_decorator(block_data): "_ComponentDataClass": block_data, # By default this new block does not declare a new ctype "_default_ctype": None, + "_build_options": [] if model_options is None else model_options, }, ) diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py index c6d98d9..225f924 100644 --- a/pySTAR/symbolic_regression.py +++ b/pySTAR/symbolic_regression.py @@ -1,8 +1,13 @@ -from typing import Optional +from typing import List, Optional import pandas as pd import pyomo.environ as pyo from pyomo.environ import Var, Constraint -from pySTAR.operators.operators import BaseOperatorData, SampleBlock + +# pylint: disable = no-name-in-module +from bigm_operators import ( + BigmSampleBlock, + BaseOperatorData as BigmOperatorData, +) class SymbolicRegressionModel(pyo.ConcreteModel): @@ -12,7 +17,7 @@ def __init__( self, *args, data: pd.DataFrame, - input_columns: list, + input_columns: List[str], output_column: str, tree_depth: int, unary_operators: Optional[list] = None, @@ -20,6 +25,7 @@ def __init__( var_bounds: tuple = (-100, 100), constant_bounds: tuple = (-100, 100), eps: float = 1e-4, + model_type: str = "bigm", **kwds, ): super().__init__(*args, **kwds) @@ -33,7 +39,10 @@ def __init__( binary_operators = ["sum", "diff", "mult", "div"] # Save a reference to the input and output data - self.input_data_ref = data[input_columns] + _col_name_map = { + _name: f"x{index + 1}" for index, _name in enumerate(input_columns) + } + self.input_data_ref = data[input_columns].rename(columns=_col_name_map) self.output_data_ref = data[output_column] self.var_bounds = pyo.Param( ["lb", "ub"], @@ -42,9 +51,8 @@ def __init__( self.constant_bounds = pyo.Param( ["lb", "ub"], initialize={"lb": constant_bounds[0], "ub": constant_bounds[1]}, - mutable=True, ) - self.eps_val = pyo.Param(initialize=eps, mutable=True, domain=pyo.PositiveReals) + self.eps_val = pyo.Param(initialize=eps, domain=pyo.PositiveReals) self.min_tree_size = pyo.Param( initialize=1, mutable=True, @@ -63,7 +71,7 @@ def __init__( self.binary_operators_set = pyo.Set(initialize=binary_operators) self.unary_operators_set = pyo.Set(initialize=unary_operators) self.operators_set = self.binary_operators_set.union(self.unary_operators_set) - self.operands_set = pyo.Set(initialize=input_columns + ["cst"]) + self.operands_set = pyo.Set(initialize=list(_col_name_map.values()) + ["cst"]) self.collective_operator_set = self.operators_set.union(self.operands_set) self.non_terminal_nodes_set = pyo.RangeSet(2 ** (tree_depth - 1) - 1) @@ -74,7 +82,12 @@ def __init__( self._build_expression_tree_model() # Calculate the output value for each sample - self.samples = SampleBlock(self.input_data_ref.index.to_list()) + if model_type == "bigm": + self.samples = BigmSampleBlock(self.input_data_ref.index.to_list()) + elif model_type == "hull": + pass + else: + raise ValueError(f"model_type: {model_type} is not recognized.") def _build_expression_tree_model(self): """Builds constraints that return a valid expression tree""" @@ -154,7 +167,7 @@ def add_objective(self, objective_type="sse"): """ if objective_type == "sse": # build SSR objective - self.sse = pyo.Objective(expr=sum(self.samples[:].square_of_error)) + self.sse = pyo.Objective(expr=sum(self.samples[:].square_of_residual)) elif objective_type == "bic": # Build BIC objective @@ -190,26 +203,68 @@ def add_symmetry_breaking_cuts(self, sample_name=None): # cuts to the first sample sample_name = self.input_data_ref.iloc[0].name - blk = self.samples[sample_name] - symmetric_operators = [ - op for op in ["sum", "mult"] if op in self.binary_operators_set - ] + self.samples[sample_name].add_symmetry_breaking_cuts() def add_implication_cuts(self): """ Adds cuts to avoid a certain combinations of operators based on the input data """ + # Avoid division by zero + near_zero_operands, negative_operands = _get_operand_domains( + data=self.input_data_ref, tol=pyo.value(self.eps_val) + ) - def add_tree_size_constraint(self, size: int): - """Adds a constraint to constrain the size of the tree""" + if len(near_zero_operands) > 0 and "div" in self.binary_operators_set: + print("Data for one/more operands is close to zero") + + @self.Constraint(self.non_terminal_nodes_set, near_zero_operands) + def implication_cuts_div_operator(blk, n, op): + return ( + blk.select_operator[n, "div"] + blk.select_operator[2 * n + 1, op] + <= blk.select_node[n] + ) + + if len(negative_operands) > 0 and "sqrt" in self.unary_operators_set: + print("Data for one/more operands is negative") + + @self.Constraint(self.non_terminal_nodes_set, negative_operands) + def implication_cuts_sqrt_operator(blk, n, op): + return ( + blk.select_operator[n, "sqrt"] + blk.select_operator[2 * n + 1, op] + <= blk.select_node[n] + ) + + non_positive_operands = list(set(near_zero_operands + negative_operands)) + if len(non_positive_operands) > 0 and "log" in self.unary_operators_set: + print("Data for one/more operands is negative") + + @self.Constraint(self.non_terminal_nodes_set, non_positive_operands) + def implication_cuts_log_operator(blk, n, op): + return ( + blk.select_operator[n, "log"] + blk.select_operator[2 * n + 1, op] + <= blk.select_node[n] + ) + + def constrain_max_tree_size(self, size: int): + """Adds a constraint to constrain the maximum size of the tree""" self.max_tree_size = size # If the constraint exists, then updating the parameter value is sufficient # pylint: disable = attribute-defined-outside-init - if not hasattr(self, "tree_size_con"): - self.tree_size_constraint = Constraint( - expr=sum(self.select_node[n] for n in self.nodes_set) - <= self.max_tree_size + if not hasattr(self, "max_tree_size_constraint"): + self.max_tree_size_constraint = Constraint( + expr=sum(self.select_node[:]) <= self.max_tree_size + ) + + def constrain_min_tree_size(self, size: int): + """Adds a constraint to constrain the minimum size of the tree""" + self.min_tree_size = size + + # If the constraint exists, then updating the parameter value is sufficient + # pylint: disable = attribute-defined-outside-init + if not hasattr(self, "min_tree_size_constraint"): + self.min_tree_size_constraint = Constraint( + expr=sum(self.select_node[:]) >= self.min_tree_size ) def relax_integrality_constraints(self): @@ -225,7 +280,7 @@ def add_integrality_constraints(self): def relax_nonconvex_constraints(self): """Convexifies all non-convex nonlinear constraints""" for blk in self.component_data_objects(pyo.Block): - if isinstance(blk, BaseOperatorData): + if isinstance(blk, BigmOperatorData): blk.construct_convex_relaxation() def get_selected_operators(self): @@ -237,3 +292,17 @@ def get_selected_operators(self): def selected_tree_to_expression(self): """Returns the optimal expression as a string""" + + +def _get_operand_domains(data: pd.DataFrame, tol: float): + near_zero_operands = [] + negative_operands = [] + + for op in data.columns: + if (abs(data[op]) < tol).any(): + near_zero_operands.append(op) + + if (data[op] < 0).any(): + negative_operands.append(op) + + return near_zero_operands, negative_operands diff --git a/pySTAR/symbolic_regression_example.py b/pySTAR/symbolic_regression_example.py index e4a7dce..e858037 100644 --- a/pySTAR/symbolic_regression_example.py +++ b/pySTAR/symbolic_regression_example.py @@ -1,31 +1,30 @@ -from symbolic_regression import SymbolicRegressionModel import numpy as np import pandas as pd import pyomo.environ as pyo +from symbolic_regression import SymbolicRegressionModel np.random.seed(42) -data = pd.DataFrame(np.random.uniform(low=1, high=3, size=(10, 2)), columns=["x1", "x2"]) -data["y"] = 2.5 * np.exp(data["x1"]) * np.log(data["x2"]) +data = pd.DataFrame( + np.random.uniform(low=1, high=3, size=(10, 2)), columns=["x1", "x2"] +) +data["y"] = data["x1"] ** 2 + data["x2"] in_cols = ["x1", "x2"] -out_cols = "y" print(data) -# data = pd.read_csv("3_4_simulation_data_10.csv") -# in_cols = ["liquid_inlet_conc_mol_comp_H2SO4", "solid_inlet_flow_mass"] -# out_cols = "liquid_outlet_flow_vol" - def build_model(): m = SymbolicRegressionModel( data=data, input_columns=in_cols, - output_column=out_cols, + output_column="y", tree_depth=4, - unary_operators=["log", "exp"], - binary_operators=["sum", "diff", "mult"], - var_bounds=(-100, 100), + unary_operators=["square", "sqrt"], + binary_operators=["sum", "diff", "mult", "div"], + var_bounds=(-10, 10), constant_bounds=(-100, 100), + model_type="bigm", ) + m.add_objective() return m @@ -51,14 +50,10 @@ def build_model(): mdl.constant_val.pprint() results = pd.DataFrame() - results["sim_data"] = data[out_cols] - results["prediction"] = [ - mdl.samples[s].node[1].val_node.value - for s in mdl.samples - ] + results["sim_data"] = data["y"] + results["prediction"] = [mdl.samples[s].val_node[1].value for s in mdl.samples] results["square_of_error"] = [ - mdl.samples[s].square_of_error.expr() - for s in mdl.samples + mdl.samples[s].square_of_residual.expr() for s in mdl.samples ] print(results) diff --git a/pySTAR/utils.py b/pySTAR/utils.py index ee1a79c..d580175 100644 --- a/pySTAR/utils.py +++ b/pySTAR/utils.py @@ -1,12 +1,42 @@ from gurobipy import nlfunc import pyomo.environ as pyo -from pySTAR.operators.operators import ( - LogOperatorData, - ExpOperatorData, -) +from pySTAR.bigm_operators import LogOperatorData, ExpOperatorData +from pySTAR.symbolic_regression import SymbolicRegressionModel -def solve_with_gurobi(m): +def _bigm_gurobi_formulation(srm: SymbolicRegressionModel): + for blk in srm.component_data_objects(pyo.Block): + if isinstance(blk, (LogOperatorData, ExpOperatorData)): + # Deactivate the block + blk.deactivate() + + solver = pyo.SolverFactory("gurobi_persistent") + solver.set_instance(srm) + gm = solver._solver_model # Gurobipy model + pm_to_gm = solver._pyomo_var_to_solver_var_map + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + for blk in srm.component_data_objects(pyo.Block): + if isinstance(blk, LogOperatorData): + sb = blk.parent_block() # Sample block + val_node = sb.val_node + op_bin_var = dict(srm.select_operator[:, "log"].wildcard_items()) + + if vub >= 10: + _bigm = pyo.value(1e5 - vlb) + else: + _bigm = pyo.value(pyo.exp(vub) - vlb) + + gm.addConstr( + nlfunc.exp(pm_to_gm[val_node[n]]) - pm_to_gm[val_node[2 * n + 1]] + <= _bigm * (1 - pm_to_gm[op_bin_var[n]]) + for n in srm.non_terminal_nodes_set + ) + + return solver + + +def _hull_gurobi_formulation(m: SymbolicRegressionModel): """Uses Gurobibpy interface to solve the MINLP""" for blk in m.component_data_objects(pyo.Block): @@ -45,3 +75,20 @@ def solve_with_gurobi(m): if isinstance(blk, (LogOperatorData, ExpOperatorData)): # Deactivate the nonlinear constraint blk.evaluate_val_node.activate() + + return grb + + +def get_gurobi(srm: SymbolicRegressionModel, options: dict | None = None): + """Returns Gurobi solver object""" + if options is None: + # Set default termination criteria + options = {} + + if srm.method_type == "bigm": + solver = _bigm_gurobi_formulation(srm) + else: + solver = _hull_gurobi_formulation(srm) + + solver.options.update(options) + return solver From 91f96c0dce834c7197da51af23c5c65695f51da6 Mon Sep 17 00:00:00 2001 From: Radhakrishna Date: Thu, 17 Jul 2025 19:49:34 -0400 Subject: [PATCH 3/9] Used Pyomo's updated declare_custom_block decorator --- pySTAR/bigm_operators.py | 97 +++++++++++++++++++++++++++++------ pySTAR/symbolic_regression.py | 2 +- 2 files changed, 81 insertions(+), 18 deletions(-) diff --git a/pySTAR/bigm_operators.py b/pySTAR/bigm_operators.py index 438dfb3..d635a15 100644 --- a/pySTAR/bigm_operators.py +++ b/pySTAR/bigm_operators.py @@ -4,13 +4,12 @@ the big-m approach """ +import pandas as pd +from pyomo.core.base.block import BlockData, declare_custom_block import pyomo.environ as pyo -from custom_block.custom_block import BlockData, declare_custom_block -OPTION_NAMES = ["val_node", "op_bin_var"] - -@declare_custom_block("BaseOperator", model_options=OPTION_NAMES) +@declare_custom_block("BaseOperator", rule="build") class BaseOperatorData(BlockData): """Operator template for big-m type constraints""" @@ -46,8 +45,13 @@ def construct_convex_relaxation(self): """Constructs a convex relaxation of the operator-specific constraints""" raise NotImplementedError() + @staticmethod + def compute_node_value(right_child: float, left_child: float): + """Computes the value at a node based on the operator""" + raise NotImplementedError() + -@declare_custom_block("SumOperator", OPTION_NAMES) +@declare_custom_block("SumOperator", rule="build") class SumOperatorData(BaseOperatorData): """Adds constraints for the addition operator""" @@ -73,8 +77,12 @@ def construct_convex_relaxation(self): # Constraints are convex, so returning the block as it is pass + @staticmethod + def compute_node_value(right_child, left_child): + return right_child + left_child -@declare_custom_block("DiffOperator", OPTION_NAMES) + +@declare_custom_block("DiffOperator", rule="build") class DiffOperatorData(BaseOperatorData): """Adds constraints for the difference operator""" @@ -100,8 +108,12 @@ def construct_convex_relaxation(self): # Constraints are convex, so returning the block as it is pass + @staticmethod + def compute_node_value(right_child, left_child): + return right_child - left_child + -@declare_custom_block("MultOperator", OPTION_NAMES) +@declare_custom_block("MultOperator", rule="build") class MultOperatorData(BaseOperatorData): """Adds constraints for the multiplication operator""" @@ -131,8 +143,12 @@ def lower_bound_constraint(_, n): def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(right_child, left_child): + return right_child * left_child -@declare_custom_block("DivOperator", OPTION_NAMES) + +@declare_custom_block("DivOperator", rule="build") class DivOperatorData(BaseOperatorData): """Adds constraints for the division operator""" @@ -160,7 +176,7 @@ def lower_bound_constraint(_, n): 1 - op_bin_var[n] ) - @self.Constraint(srm.non_terminal_modes_set) + @self.Constraint(srm.non_terminal_nodes_set) def avoid_zero_constraint(_, n): # Ensures that the denominator is away from zero # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 @@ -169,8 +185,12 @@ def avoid_zero_constraint(_, n): def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(right_child, left_child): + return right_child / left_child + -@declare_custom_block("SqrtOperator", OPTION_NAMES) +@declare_custom_block("SqrtOperator", rule="build") class SqrtOperatorData(BaseOperatorData): """Adds constraints for the square root operator""" @@ -188,15 +208,19 @@ def lower_bound_constraint(_, n): bigm = -vub return val_node[n] ** 2 - val_node[2 * n + 1] >= bigm * (1 - op_bin_var[n]) - @self.Constraint(srm.non_terminal_modes_set) + @self.Constraint(srm.non_terminal_nodes_set) def non_negativity_constraint(_, n): return val_node[2 * n + 1] >= (1 - op_bin_var[n]) * vlb def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, left_child): + return pyo.sqrt(left_child) + -@declare_custom_block("ExpOperator", OPTION_NAMES) +@declare_custom_block("ExpOperator", rule="build") class ExpOperatorData(BaseOperatorData): """Adds constraints for the exponential operator""" @@ -226,8 +250,12 @@ def lower_bound_constraint(_, n): def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, left_child): + return pyo.exp(left_child) -@declare_custom_block("LogOperator", OPTION_NAMES) + +@declare_custom_block("LogOperator", rule="build") class LogOperatorData(BaseOperatorData): """Adds constraints for the logarithm operator""" @@ -255,7 +283,7 @@ def lower_bound_constraint(_, n): 1 - op_bin_var[n] ) - @self.Constraint(srm.non_terminal_modes_set) + @self.Constraint(srm.non_terminal_nodes_set) def avoid_zero_constraint(_, n): # Ensures that the denominator is away from zero # return eps * op_bin_var[n] <= val_node[2 * n + 1] ** 2 @@ -264,8 +292,12 @@ def avoid_zero_constraint(_, n): def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, left_child): + return pyo.log(left_child) + -@declare_custom_block("SquareOperator", OPTION_NAMES) +@declare_custom_block("SquareOperator", rule="build") class SquareOperatorData(BaseOperatorData): """Adds constraints for the square operator""" @@ -286,6 +318,10 @@ def lower_bound_constraint(_, n): def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, left_child): + return left_child**2 + # pylint: disable = undefined-variable BIGM_OPERATORS = { @@ -300,7 +336,7 @@ def construct_convex_relaxation(self): } -@declare_custom_block("BigmSampleBlock") +@declare_custom_block("BigmSampleBlock", rule="build") class BigmSampleBlockData(BlockData): """Class for evaluating the expression tree for each sample""" @@ -346,7 +382,7 @@ def value_lower_bound_constraint(blk, n): # Get binary variables corresponding to the opeartor # and save them in a dictionary. Here, the keys are node # numbers and the values are Var objects. - op_bin_vars = dict(srm.select_operator[:, op].wildcard_items()) + op_bin_vars = {n: srm.select_operator[n, op] for n in srm.nodes_set} setattr( self, op + "_operator", @@ -374,3 +410,30 @@ def symmetry_breaking_constraints(blk, n): srm.select_node[n] - sum(srm.select_operator[n, op] for op in symmetric_operators) ) + + def compare_node_values(self): + """Compares the node values obtained from the model and manual calculation""" + srm = self.symbolic_regression_model + data = srm.input_data_ref.loc[self.index()] + true_value = { + n: srm.constant_val[n].value + + sum( + data[op] * srm.select_operator[n, op].value + for op in srm.operands_set + if op != "cst" + ) + for n in srm.nodes_set + } + for n in range(len(srm.non_terminal_nodes_set), 0, -1): + for op in srm.operators_set: + if srm.select_operator[n, op].value > 0.99: + # Operator is selector + true_value[n] += getattr(self, op + "_operator").compute_node_value( + true_value[2 * n], true_value[2 * n + 1] + ) + + computed_value = {n: self.val_node[n].value for n in self.val_node} + + return pd.DataFrame.from_dict( + {"True Value": true_value, "Computed Value": computed_value} + ) diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py index 225f924..4106cd5 100644 --- a/pySTAR/symbolic_regression.py +++ b/pySTAR/symbolic_regression.py @@ -52,7 +52,7 @@ def __init__( ["lb", "ub"], initialize={"lb": constant_bounds[0], "ub": constant_bounds[1]}, ) - self.eps_val = pyo.Param(initialize=eps, domain=pyo.PositiveReals) + self.eps_value = pyo.Param(initialize=eps, domain=pyo.PositiveReals) self.min_tree_size = pyo.Param( initialize=1, mutable=True, From 685876c76659eccc26970c55cf1b26398ce4cdf9 Mon Sep 17 00:00:00 2001 From: Radhakrishna Date: Thu, 17 Jul 2025 19:50:46 -0400 Subject: [PATCH 4/9] Removed custom block code --- pySTAR/custom_block/LICENSE.md | 39 ----- pySTAR/custom_block/__init__.py | 0 pySTAR/custom_block/custom_block.py | 146 ----------------- pySTAR/custom_block/tests/__init__.py | 0 .../custom_block/tests/test_custom_block.py | 148 ------------------ 5 files changed, 333 deletions(-) delete mode 100644 pySTAR/custom_block/LICENSE.md delete mode 100644 pySTAR/custom_block/__init__.py delete mode 100644 pySTAR/custom_block/custom_block.py delete mode 100644 pySTAR/custom_block/tests/__init__.py delete mode 100644 pySTAR/custom_block/tests/test_custom_block.py diff --git a/pySTAR/custom_block/LICENSE.md b/pySTAR/custom_block/LICENSE.md deleted file mode 100644 index c9c8b5d..0000000 --- a/pySTAR/custom_block/LICENSE.md +++ /dev/null @@ -1,39 +0,0 @@ -The source code in this folder is taken from the Pyomo repository (https://github.com/Pyomo/pyomo), -and modified as per the needs of PRIMO. See `pyomo/core/base/block.py` for the original source code. - -LICENSE -======= - -Copyright (c) 2008-2025 National Technology and Engineering Solutions of -Sandia, LLC . Under the terms of Contract DE-NA0003525 with National -Technology and Engineering Solutions of Sandia, LLC , the U.S. -Government retains certain rights in this software. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -* Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -* Neither the name of the Sandia National Laboratories nor the names of -its contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/pySTAR/custom_block/__init__.py b/pySTAR/custom_block/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pySTAR/custom_block/custom_block.py b/pySTAR/custom_block/custom_block.py deleted file mode 100644 index 8ffff52..0000000 --- a/pySTAR/custom_block/custom_block.py +++ /dev/null @@ -1,146 +0,0 @@ -# ___________________________________________________________________________ -# -# Pyomo: Python Optimization Modeling Objects -# Copyright (c) 2008-2025 -# National Technology and Engineering Solutions of Sandia, LLC -# Under the terms of Contract DE-NA0003525 with National Technology and -# Engineering Solutions of Sandia, LLC, the U.S. Government retains certain -# rights in this software. -# This software is distributed under the 3-clause BSD License. -# ___________________________________________________________________________ - - -""" -This module modifies the declare_custom_block decorator available in -pyomo.core.base.block to set a default rule argument. -""" - -# Standard libs -import sys - -# Installed libs -# pylint: disable = unused-import -# Although BlockData is not needed for this file, we are importing it so that -# both BlockData and declare_custom_block can be imported from this file when needed. -# Otherwise, BlockData needs to be imported separately from Pyomo. -from pyomo.core.base.block import ( - Block, - BlockData, - ScalarCustomBlockMixin, - UnindexedComponent_set, -) - - -def _default_rule(model_options): - """ - Default rule for custom blocks - - Parameters - ---------- - model_options : dict - Dictionary of options needed to construct the block model - """ - - def _rule(blk, *args): - try: - # Attempt to build the model - blk.build(*args, **model_options) - - except AttributeError: - # build method is not implemented in the BlockData class - # Returning an empty Pyomo Block - pass - - return _rule - - -# pylint: disable = protected-access, no-member -class CustomBlock(Block): - """The base class used by instances of custom block components""" - - def __init__(self, *args, **kwargs): - config = {key: kwargs.pop(key) for key in self._build_options if key in kwargs} - kwargs.setdefault("rule", _default_rule(config)) - - if self._default_ctype is not None: - kwargs.setdefault("ctype", self._default_ctype) - Block.__init__(self, *args, **kwargs) - - def __new__(cls, *args, **kwargs): - if cls.__bases__[0] is not CustomBlock: - # we are creating a class other than the "generic" derived - # custom block class. We can assume that the routing of the - # generic block class to the specific Scalar or Indexed - # subclass has already occurred and we can pass control up - # to (toward) object.__new__() - return super().__new__(cls, *args, **kwargs) - # If the first base class is this CustomBlock class, then the - # user is attempting to create the "generic" block class. - # Depending on the arguments, we need to map this to either the - # Scalar or Indexed block subclass. - if not args or (args[0] is UnindexedComponent_set and len(args) == 1): - return super().__new__(cls._scalar_custom_block, *args, **kwargs) - else: - return super().__new__(cls._indexed_custom_block, *args, **kwargs) - - -def declare_custom_block(name, model_options=None): - """Decorator to declare components for a custom block data class - - >>> @declare_custom_block(name="FooBlock") - ... class FooBlockData(BlockData): - ... # custom block data class - ... def build(self, *args, **kwargs): # Default rule argument - ... pass - """ - - def block_data_decorator(block_data): - # this is the decorator function that creates the block - # component classes - - # Declare the new Block component (derived from CustomBlock) - # corresponding to the BlockData that we are decorating - # - # Note the use of `type(CustomBlock)` to pick up the metaclass - # that was used to create the CustomBlock (in general, it should - # be `type`) - comp = type(CustomBlock)( - name, # name of new class - (CustomBlock,), # base classes - # class body definitions (populate the new class' __dict__) - { - # ensure the created class is associated with the calling module - "__module__": block_data.__module__, - # Default IndexedComponent data object is the decorated class: - "_ComponentDataClass": block_data, - # By default this new block does not declare a new ctype - "_default_ctype": None, - "_build_options": [] if model_options is None else model_options, - }, - ) - - # Declare Indexed and Scalar versions of the custom block. We - # will register them both with the calling module scope, and - # with the CustomBlock (so that CustomBlock.__new__ can route - # the object creation to the correct class) - comp._indexed_custom_block = type(comp)( - "Indexed" + name, - (comp,), - { # ensure the created class is associated with the calling module - "__module__": block_data.__module__ - }, - ) - comp._scalar_custom_block = type(comp)( - "Scalar" + name, - (ScalarCustomBlockMixin, block_data, comp), - { # ensure the created class is associated with the calling module - "__module__": block_data.__module__ - }, - ) - - # Register the new Block types in the same module as the BlockData - for _cls in (comp, comp._indexed_custom_block, comp._scalar_custom_block): - setattr(sys.modules[block_data.__module__], _cls.__name__, _cls) - return block_data - - return block_data_decorator diff --git a/pySTAR/custom_block/tests/__init__.py b/pySTAR/custom_block/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pySTAR/custom_block/tests/test_custom_block.py b/pySTAR/custom_block/tests/test_custom_block.py deleted file mode 100644 index e963360..0000000 --- a/pySTAR/custom_block/tests/test_custom_block.py +++ /dev/null @@ -1,148 +0,0 @@ -################################################################################# -# PRIMO - The P&A Project Optimizer was produced under the Methane Emissions -# Reduction Program (MERP) and National Energy Technology Laboratory's (NETL) -# National Emissions Reduction Initiative (NEMRI). -# -# NOTICE. This Software was developed under funding from the U.S. Government -# and the U.S. Government consequently retains certain rights. As such, the -# U.S. Government has been granted for itself and others acting on its behalf -# a paid-up, nonexclusive, irrevocable, worldwide license in the Software to -# reproduce, distribute copies to the public, prepare derivative works, and -# perform publicly and display publicly, and to permit others to do so. -################################################################################# - -""" -This module modifies the declare_custom_block decorator available in -pyomo.core.base.block to set a default rule argument. -""" - -# Installed libs -import pyomo.environ as pyo -import pytest - -# User-defined libs -from primo.custom_block.custom_block import BlockData, declare_custom_block - - -# pylint: disable = undefined-variable, attribute-defined-outside-init -# pylint: disable = missing-function-docstring -def test_custom_block_1(): - """Tests the decorator without the `build` method""" - - @declare_custom_block("FooBlock") - class FooBlockData(BlockData): - """Construct an empty custom block""" - - m = pyo.ConcreteModel() - m.blk_without_index = FooBlock() - m.blk_with_index = FooBlock([1, 2, 3, 4]) - - assert isinstance(m.blk_without_index, FooBlockData) - for p in m.blk_with_index: - assert isinstance(m.blk_with_index[p], FooBlockData) - assert len(m.blk_with_index) == 4 - - -def test_custom_block_2(): - """Tests the decorator with the `build` method, but without options""" - - @declare_custom_block("FooBlock") - class FooBlockData(BlockData): - """A dummy custom block""" - - def build(self, *args): - self.x = pyo.Var(list(args)) - self.y = pyo.Var() - - m = pyo.ConcreteModel() - m.blk_without_index = FooBlock() - m.blk_1 = FooBlock([1, 2, 3]) - m.blk_2 = FooBlock([4, 5], [6, 7]) - - assert isinstance(m.blk_without_index, FooBlockData) - for p in m.blk_1: - assert isinstance(m.blk_1[p], FooBlockData) - - for p in m.blk_2: - assert isinstance(m.blk_2[p], FooBlockData) - - assert hasattr(m.blk_without_index, "x") - assert hasattr(m.blk_without_index, "y") - - assert len(m.blk_1) == 3 - assert len(m.blk_2) == 4 - - assert len(m.blk_1[2].x) == 1 - assert len(m.blk_2[4, 6].x) == 2 - - -def test_custom_block_3(): - """Tests the decorator with the `build` method and model options""" - - @declare_custom_block("FooBlock") - class FooBlockData(BlockData): - """A dummy custom block""" - - def build(self, *args, capex, opex): - self.x = pyo.Var(list(args)) - self.y = pyo.Var() - - self.capex = capex - self.opex = opex - - options = {"capex": 42, "opex": 24} - - m = pyo.ConcreteModel() - m.blk_without_index = FooBlock(model_options=options) - m.blk_1 = FooBlock([1, 2, 3], model_options=options) - m.blk_2 = FooBlock([4, 5], [6, 7], model_options=options) - - assert isinstance(m.blk_without_index, FooBlockData) - for p in m.blk_1: - assert isinstance(m.blk_1[p], FooBlockData) - - for p in m.blk_2: - assert isinstance(m.blk_2[p], FooBlockData) - - assert hasattr(m.blk_without_index, "x") - assert hasattr(m.blk_without_index, "y") - assert m.blk_without_index.capex == 42 - assert m.blk_without_index.opex == 24 - - assert len(m.blk_1) == 3 - assert len(m.blk_2) == 4 - - assert len(m.blk_1[2].x) == 1 - assert len(m.blk_2[4, 6].x) == 2 - - assert m.blk_1[3].capex == 42 - assert m.blk_2[4, 7].opex == 24 - - with pytest.raises(TypeError): - # missing 2 required keyword-only arguments - m.blk_3 = FooBlock() - - -def test_custom_block_4(): - """Tests if the default rule can be overwritten""" - - @declare_custom_block("FooBlock") - class FooBlockData(BlockData): - """A dummy custom block""" - - def build(self, *args): - self.x = pyo.Var(list(args)) - self.y = pyo.Var() - - def _new_rule(blk): - blk.a = pyo.Var() - blk.b = pyo.Var() - - m = pyo.ConcreteModel() - m.blk = FooBlock(rule=_new_rule) - - assert isinstance(m.blk, FooBlockData) - assert not hasattr(m.blk, "x") - assert not hasattr(m.blk, "y") - assert isinstance(m.blk.a, pyo.Var) - assert isinstance(m.blk.b, pyo.Var) From 7f1a45b772efa92b3567f079d468b00fd62b1b88 Mon Sep 17 00:00:00 2001 From: Radhakrishna Date: Sun, 20 Jul 2025 16:09:40 -0400 Subject: [PATCH 5/9] Fixed a few typos and adding logging --- pySTAR/hull_operators.py | 62 +++++---------------- pySTAR/symbolic_regression.py | 79 ++++++++++++++++++--------- pySTAR/symbolic_regression_example.py | 3 +- pySTAR/utils.py | 19 ++++--- 4 files changed, 79 insertions(+), 84 deletions(-) diff --git a/pySTAR/hull_operators.py b/pySTAR/hull_operators.py index f8c9449..50a6a54 100644 --- a/pySTAR/hull_operators.py +++ b/pySTAR/hull_operators.py @@ -4,17 +4,14 @@ """ import logging -from pyomo.core.base.block import BlockData +from pyomo.core.base.block import BlockData, declare_custom_block from pyomo.environ import Var, Constraint import pyomo.environ as pyo -from pySTAR.operators.custom_block import declare_custom_block -from pySTAR.operators.relaxation import mccormick_relaxation - LOGGER = logging.getLogger(__name__) -@declare_custom_block("BaseOperator") +@declare_custom_block("BaseOperator", rule="build") class BaseOperatorData(BlockData): """Base class for defining operator models""" @@ -157,10 +154,7 @@ def build_operator_model(self): self.add_bound_constraints() def construct_convex_relaxation(self): - self.evaluate_val_node.deactivate() - mccormick_relaxation( - self, z=self.val_node, x=self.val_left_node, y=self.val_right_node - ) + raise NotImplementedError() @declare_custom_block("DivOperator") @@ -176,17 +170,7 @@ def build_operator_model(self): self.add_bound_constraints(val_right_node=False) def construct_convex_relaxation(self): - self.evaluate_val_node.deactivate() - self.calculate_val_right_node.deactivate() - mccormick_relaxation( - self, z=self.val_left_node, x=self.val_node, y=self.aux_var_right - ) - mccormick_relaxation( - self, - z=self.val_right_node, - x=self.get_operator_binary_var(), - y=self.aux_var_right, - ) + raise NotImplementedError() # pylint: disable = no-member @@ -206,17 +190,6 @@ def build_operator_model(self): ) def construct_convex_relaxation(self): - # Need to implement outer-approximation - # Update bounds on val_node and val_right_node variables - ub_val_node = self.val_node.ub - ub_val_right_node = self.val_right_node.ub - lb_val_right_node = self.val_right_node.lb - - self.val_node.setub(min(ub_val_node, ub_val_right_node**2)) - self.val_right_node.setub(min(pyo.sqrt(ub_val_node), ub_val_right_node)) - self.val_node.setlb(0) - self.val_right_node.setlb(max(-pyo.sqrt(ub_val_node), lb_val_right_node)) - raise NotImplementedError() @@ -236,18 +209,6 @@ def build_operator_model(self): ) def construct_convex_relaxation(self): - # Make the domain of the variable non-negative - ub_val_node = self.val_node.ub - ub_val_right_node = self.val_right_node.ub - - self.val_node.setub(min(ub_val_node, pyo.sqrt(ub_val_right_node))) - self.val_right_node.setub(min(ub_val_node**2, ub_val_right_node)) - self.val_right_node.setlb(0) - self.val_node.setlb(0) - - self.del_component(self.node_val_lb_con) - self.del_component(self.right_node_val_lb_con) - raise NotImplementedError() @@ -323,8 +284,10 @@ def construct_convex_relaxation(self): } -@declare_custom_block("SampleBlock") -class SampleBlockData(BlockData): +@declare_custom_block("HullSampleBlock", rule="build") +class HullSampleBlockData(BlockData): + """Class for evaluating the expression tree for each sample""" + def build(self, s): """rule for building the expression model for each sample""" pb = self.parent_block() @@ -362,7 +325,7 @@ def node(blk, n): setattr( self.node[n], "op_" + op, - OPERATOR_MODELS[op](config={"bin_var": pb.select_operator[n, op]}), + OPERATOR_MODELS[op](bin_var=pb.select_operator[n, op]), ) op_blocks.append(getattr(self.node[n], "op_" + op)) @@ -381,6 +344,9 @@ def node(blk, n): == sum(blk.val_right_node for blk in op_blocks) ) - self.square_of_error = pyo.Expression( - expr=(self.node[1].val_node - output_value) ** 2 + self.residual = pyo.Var(doc="Residual value for the sample") + self.calculate_residual = pyo.Constraint( + expr=self.residual == output_value - self.node[1].val_node, + doc="Computes the residual between prediction and the data", ) + self.square_of_residual = pyo.Expression(expr=self.residual**2) diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py index 4106cd5..4e8a84f 100644 --- a/pySTAR/symbolic_regression.py +++ b/pySTAR/symbolic_regression.py @@ -1,15 +1,19 @@ +import logging from typing import List, Optional import pandas as pd import pyomo.environ as pyo from pyomo.environ import Var, Constraint -# pylint: disable = no-name-in-module -from bigm_operators import ( - BigmSampleBlock, - BaseOperatorData as BigmOperatorData, -) +# pylint: disable = import-error +from bigm_operators import BigmSampleBlock +from hull_operators import HullSampleBlock +LOGGER = logging.getLogger(__name__) +SUPPORTED_UNARY_OPS = ["square", "sqrt", "log", "exp"] +SUPPORTED_BINARY_OPS = ["sum", "diff", "mult", "div"] + +# pylint: disable = logging-fstring-interpolation class SymbolicRegressionModel(pyo.ConcreteModel): """Builds the symbolic regression model for a given data""" @@ -20,8 +24,7 @@ def __init__( input_columns: List[str], output_column: str, tree_depth: int, - unary_operators: Optional[list] = None, - binary_operators: Optional[list] = None, + operators: Optional[list] = None, var_bounds: tuple = (-100, 100), constant_bounds: tuple = (-100, 100), eps: float = 1e-4, @@ -30,13 +33,29 @@ def __init__( ): super().__init__(*args, **kwds) - if unary_operators is None: - # If not specified, use all supported unary operators - unary_operators = ["square", "sqrt", "exp", "log"] + # Check if the received operators are supported or not + if operators is None: + # If not specified, use all supported operators + unary_operators = SUPPORTED_UNARY_OPS + binary_operators = SUPPORTED_BINARY_OPS - if binary_operators is None: - # If not specified, use all supported binary operators - binary_operators = ["sum", "diff", "mult", "div"] + else: + # Ensure that there are no duplicates in the list of operators + if len(operators) != len(set(operators)): + raise ValueError("Duplicates are present in the list of operators") + + unary_operators = [] + binary_operators = [] + for op in operators: + if op in SUPPORTED_UNARY_OPS: + unary_operators.append(op) + elif op in SUPPORTED_BINARY_OPS: + binary_operators.append(op) + else: + raise ValueError(f"Operator {op} is not recognized!") + + LOGGER.info(f"Using {unary_operators} unary operators.") + LOGGER.info(f"Using {binary_operators} binary operators.") # Save a reference to the input and output data _col_name_map = { @@ -85,7 +104,7 @@ def __init__( if model_type == "bigm": self.samples = BigmSampleBlock(self.input_data_ref.index.to_list()) elif model_type == "hull": - pass + self.samples = HullSampleBlock(self.input_data_ref.index.to_list()) else: raise ValueError(f"model_type: {model_type} is not recognized.") @@ -155,7 +174,7 @@ def constant_ub_con(blk, n): ) # pylint: disable = attribute-defined-outside-init - def add_objective(self, objective_type="sse"): + def add_objective(self, objective_type: str = "sse"): """Appends objective function to the model Parameters @@ -171,7 +190,7 @@ def add_objective(self, objective_type="sse"): elif objective_type == "bic": # Build BIC objective - pass + raise NotImplementedError("BIC is not supported currently") else: raise ValueError( @@ -211,11 +230,15 @@ def add_implication_cuts(self): """ # Avoid division by zero near_zero_operands, negative_operands = _get_operand_domains( - data=self.input_data_ref, tol=pyo.value(self.eps_val) + data=self.input_data_ref, tol=pyo.value(self.eps_value) ) + # pylint: disable = logging-fstring-interpolation if len(near_zero_operands) > 0 and "div" in self.binary_operators_set: - print("Data for one/more operands is close to zero") + LOGGER.info( + f"Data for operands {near_zero_operands} is close to zero. " + f"Adding cuts to prevent division with these operands." + ) @self.Constraint(self.non_terminal_nodes_set, near_zero_operands) def implication_cuts_div_operator(blk, n, op): @@ -225,7 +248,10 @@ def implication_cuts_div_operator(blk, n, op): ) if len(negative_operands) > 0 and "sqrt" in self.unary_operators_set: - print("Data for one/more operands is negative") + LOGGER.info( + f"Data for operands {negative_operands} is negative. " + f"Adding cuts to prevent sqrt of these operands." + ) @self.Constraint(self.non_terminal_nodes_set, negative_operands) def implication_cuts_sqrt_operator(blk, n, op): @@ -236,7 +262,10 @@ def implication_cuts_sqrt_operator(blk, n, op): non_positive_operands = list(set(near_zero_operands + negative_operands)) if len(non_positive_operands) > 0 and "log" in self.unary_operators_set: - print("Data for one/more operands is negative") + LOGGER.info( + f"Data for operands {non_positive_operands} is non-positive. " + f"Adding cuts to prevent log of these operands." + ) @self.Constraint(self.non_terminal_nodes_set, non_positive_operands) def implication_cuts_log_operator(blk, n, op): @@ -253,7 +282,8 @@ def constrain_max_tree_size(self, size: int): # pylint: disable = attribute-defined-outside-init if not hasattr(self, "max_tree_size_constraint"): self.max_tree_size_constraint = Constraint( - expr=sum(self.select_node[:]) <= self.max_tree_size + expr=sum(self.select_node[n] for n in self.nodes_set) + <= self.max_tree_size ) def constrain_min_tree_size(self, size: int): @@ -264,7 +294,8 @@ def constrain_min_tree_size(self, size: int): # pylint: disable = attribute-defined-outside-init if not hasattr(self, "min_tree_size_constraint"): self.min_tree_size_constraint = Constraint( - expr=sum(self.select_node[:]) >= self.min_tree_size + expr=sum(self.select_node[n] for n in self.nodes_set) + >= self.min_tree_size ) def relax_integrality_constraints(self): @@ -279,9 +310,7 @@ def add_integrality_constraints(self): def relax_nonconvex_constraints(self): """Convexifies all non-convex nonlinear constraints""" - for blk in self.component_data_objects(pyo.Block): - if isinstance(blk, BigmOperatorData): - blk.construct_convex_relaxation() + raise NotImplementedError("Model Convexification is not currently supported") def get_selected_operators(self): """ diff --git a/pySTAR/symbolic_regression_example.py b/pySTAR/symbolic_regression_example.py index e858037..d4d9daf 100644 --- a/pySTAR/symbolic_regression_example.py +++ b/pySTAR/symbolic_regression_example.py @@ -18,8 +18,7 @@ def build_model(): input_columns=in_cols, output_column="y", tree_depth=4, - unary_operators=["square", "sqrt"], - binary_operators=["sum", "diff", "mult", "div"], + operators=["sum", "diff", "mult", "div", "square", "sqrt"], var_bounds=(-10, 10), constant_bounds=(-100, 100), model_type="bigm", diff --git a/pySTAR/utils.py b/pySTAR/utils.py index d580175..b03266b 100644 --- a/pySTAR/utils.py +++ b/pySTAR/utils.py @@ -1,12 +1,13 @@ from gurobipy import nlfunc import pyomo.environ as pyo -from pySTAR.bigm_operators import LogOperatorData, ExpOperatorData +from bigm_operators import LogOperatorData as BigmLog, ExpOperatorData as BigmExp +from hull_operators import LogOperatorData as HullLog, ExpOperatorData as HullExp from pySTAR.symbolic_regression import SymbolicRegressionModel def _bigm_gurobi_formulation(srm: SymbolicRegressionModel): for blk in srm.component_data_objects(pyo.Block): - if isinstance(blk, (LogOperatorData, ExpOperatorData)): + if isinstance(blk, (BigmLog, BigmExp)): # Deactivate the block blk.deactivate() @@ -17,10 +18,10 @@ def _bigm_gurobi_formulation(srm: SymbolicRegressionModel): vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] for blk in srm.component_data_objects(pyo.Block): - if isinstance(blk, LogOperatorData): + if isinstance(blk, BigmLog): sb = blk.parent_block() # Sample block val_node = sb.val_node - op_bin_var = dict(srm.select_operator[:, "log"].wildcard_items()) + op_bin_var = {n: srm.select_operator[n, "log"] for n in srm.nodes_set} if vub >= 10: _bigm = pyo.value(1e5 - vlb) @@ -40,7 +41,7 @@ def _hull_gurobi_formulation(m: SymbolicRegressionModel): """Uses Gurobibpy interface to solve the MINLP""" for blk in m.component_data_objects(pyo.Block): - if isinstance(blk, (LogOperatorData, ExpOperatorData)): + if isinstance(blk, (HullLog, HullExp)): # Deactivate the nonlinear constraint blk.evaluate_val_node.deactivate() @@ -51,7 +52,7 @@ def _hull_gurobi_formulation(m: SymbolicRegressionModel): pm_to_gm = grb._pyomo_var_to_solver_var_map for blk in m.component_data_objects(pyo.Block): - if isinstance(blk, LogOperatorData): + if isinstance(blk, HullLog): # Add the nonlinear constraint gm.addConstr( pm_to_gm[blk.val_node] @@ -59,7 +60,7 @@ def _hull_gurobi_formulation(m: SymbolicRegressionModel): * nlfunc.log(pm_to_gm[blk.aux_var_right]) ) - elif isinstance(blk, ExpOperatorData): + elif isinstance(blk, HullExp): # Add the nonlinear constraint gm.addConstr( pm_to_gm[blk.val_node] @@ -72,7 +73,7 @@ def _hull_gurobi_formulation(m: SymbolicRegressionModel): # Activate the constraint back for blk in m.component_data_objects(pyo.Block): - if isinstance(blk, (LogOperatorData, ExpOperatorData)): + if isinstance(blk, (HullLog, HullExp)): # Deactivate the nonlinear constraint blk.evaluate_val_node.activate() @@ -83,7 +84,7 @@ def get_gurobi(srm: SymbolicRegressionModel, options: dict | None = None): """Returns Gurobi solver object""" if options is None: # Set default termination criteria - options = {} + options = {"MIPGap": 0.01, "TimeLimit": 3600} if srm.method_type == "bigm": solver = _bigm_gurobi_formulation(srm) From 2b30e9293ad27c2c17f5a0786d0b2bd38e15b02d Mon Sep 17 00:00:00 2001 From: radhakrishnatg Date: Sat, 4 Oct 2025 13:25:59 -0400 Subject: [PATCH 6/9] Added auxiliary variables for exp and log functions --- pySTAR/bigm_operators.py | 153 +++++++++++++++++++++++++++++++++- pySTAR/symbolic_regression.py | 1 + 2 files changed, 150 insertions(+), 4 deletions(-) diff --git a/pySTAR/bigm_operators.py b/pySTAR/bigm_operators.py index d635a15..86a9b79 100644 --- a/pySTAR/bigm_operators.py +++ b/pySTAR/bigm_operators.py @@ -220,8 +220,8 @@ def compute_node_value(_, left_child): return pyo.sqrt(left_child) -@declare_custom_block("ExpOperator", rule="build") -class ExpOperatorData(BaseOperatorData): +@declare_custom_block("ExpOldOperator", rule="build") +class ExpOldOperatorData(BaseOperatorData): """Adds constraints for the exponential operator""" def build(self, *args, val_node: pyo.Var, op_bin_var: dict): @@ -255,8 +255,78 @@ def compute_node_value(_, left_child): return pyo.exp(left_child) -@declare_custom_block("LogOperator", rule="build") -class LogOperatorData(BaseOperatorData): +@declare_custom_block("ExpCompOperator", rule="build") +class ExpCompOperatorData(BaseOperatorData): + """Adds constraints for the exponential operator""" + + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): + srm = self.symbolic_regression_model + + @self.Constraint(srm.non_terminal_nodes_set) + def complementarity_constraint(_, n): + return op_bin_var[n] * (val_node[n] - pyo.exp(val_node[2 * n + 1])) == 0 + + def construct_convex_relaxation(self): + raise NotImplementedError() + + @staticmethod + def compute_node_value(_, left_child): + return pyo.exp(left_child) + + +@declare_custom_block("ExpOperator", rule="build") +class ExpOperatorData(BaseOperatorData): + """Adds constraints for the exponential operator""" + + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + # pylint: disable = attribute-defined-outside-init + self.aux_var_exp = pyo.Var( + srm.non_terminal_nodes_set, + bounds=(max(-10, vlb), pyo.log(vub)), + doc="Auxiliary variable for modeling exp function", + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def func_upper_bound_constraint(blk, n): + bigm = vub - 0 + return val_node[n] - pyo.exp(blk.aux_var_exp[n]) <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def func_lower_bound_constraint(blk, n): + bigm = vlb - vub + return val_node[n] - pyo.exp(blk.aux_var_exp[n]) >= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def var_upper_bound_constraint(blk, n): + bigm = vub - blk.aux_var_exp[n].lb + return val_node[2 * n + 1] - blk.aux_var_exp[n] <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def var_lower_bound_constraint(blk, n): + bigm = vlb - blk.aux_var_exp[n].ub + return val_node[2 * n + 1] - blk.aux_var_exp[n] >= bigm * ( + 1 - op_bin_var[n] + ) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + @staticmethod + def compute_node_value(_, left_child): + return pyo.exp(left_child) + + +@declare_custom_block("LogOldOperator", rule="build") +class LogOldOperatorData(BaseOperatorData): """Adds constraints for the logarithm operator""" def build(self, *args, val_node: pyo.Var, op_bin_var: dict): @@ -297,6 +367,77 @@ def compute_node_value(_, left_child): return pyo.log(left_child) +@declare_custom_block("LogCompOperator", rule="build") +class LogCompOperatorData(BaseOperatorData): + """Adds constraints for the exponential operator""" + + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): + srm = self.symbolic_regression_model + + @self.Constraint(srm.non_terminal_nodes_set) + def complementarity_constraint(_, n): + # Need to transform the equation to avoid log domain error + return op_bin_var[n] * (pyo.exp(val_node[n]) - val_node[2 * n + 1]) == 0 + + def construct_convex_relaxation(self): + raise NotImplementedError() + + @staticmethod + def compute_node_value(_, left_child): + return pyo.log(left_child) + + +@declare_custom_block("LogOperator", rule="build") +class LogOperatorData(BaseOperatorData): + """Adds constraints for the exponential operator""" + + def build(self, *args, val_node: pyo.Var, op_bin_var: dict): + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + + # pylint: disable = attribute-defined-outside-init + self.aux_var_log = pyo.Var( + srm.non_terminal_nodes_set, + bounds=(srm.eps_value, vub), + doc="Auxiliary variable for modeling log function", + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def func_upper_bound_constraint(blk, n): + bigm = vub - pyo.log(blk.aux_var_log[n].lb) + return val_node[n] - pyo.log(blk.aux_var_log[n]) <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def func_lower_bound_constraint(blk, n): + bigm = vlb - pyo.log(blk.aux_var_log[n].ub) + return val_node[n] - pyo.log(blk.aux_var_log[n]) >= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def var_upper_bound_constraint(blk, n): + bigm = vub - blk.aux_var_log[n].lb + return val_node[2 * n + 1] - blk.aux_var_log[n] <= bigm * ( + 1 - op_bin_var[n] + ) + + @self.Constraint(srm.non_terminal_nodes_set) + def var_lower_bound_constraint(blk, n): + bigm = vlb - blk.aux_var_log[n].ub + return val_node[2 * n + 1] - blk.aux_var_log[n] >= bigm * ( + 1 - op_bin_var[n] + ) + + def construct_convex_relaxation(self): + raise NotImplementedError() + + @staticmethod + def compute_node_value(_, left_child): + return pyo.log(left_child) + + @declare_custom_block("SquareOperator", rule="build") class SquareOperatorData(BaseOperatorData): """Adds constraints for the square operator""" @@ -331,7 +472,11 @@ def compute_node_value(_, left_child): "div": DivOperator, "sqrt": SqrtOperator, "log": LogOperator, + "log_comp": LogCompOperator, + "log_old": LogOldOperator, "exp": ExpOperator, + "exp_comp": ExpCompOperator, + "exp_old": ExpOldOperator, "square": SquareOperator, } diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py index 4e8a84f..9cd6ce0 100644 --- a/pySTAR/symbolic_regression.py +++ b/pySTAR/symbolic_regression.py @@ -11,6 +11,7 @@ LOGGER = logging.getLogger(__name__) SUPPORTED_UNARY_OPS = ["square", "sqrt", "log", "exp"] SUPPORTED_BINARY_OPS = ["sum", "diff", "mult", "div"] +SUPPORTED_UNARY_OPS += ["exp_old", "exp_comp", "log_old", "log_comp"] # pylint: disable = logging-fstring-interpolation From e2e9feac40963c7bc7dda1b8655c69ab1aedde28 Mon Sep 17 00:00:00 2001 From: radhakrishnatg Date: Sat, 4 Oct 2025 19:58:18 -0400 Subject: [PATCH 7/9] Cleaned up hull formulation and added a few redundancy cuts --- pySTAR/hull_operators.py | 195 ++++++++++++++++++++++------------ pySTAR/symbolic_regression.py | 27 +++++ 2 files changed, 153 insertions(+), 69 deletions(-) diff --git a/pySTAR/hull_operators.py b/pySTAR/hull_operators.py index 50a6a54..a05b52e 100644 --- a/pySTAR/hull_operators.py +++ b/pySTAR/hull_operators.py @@ -4,6 +4,7 @@ """ import logging +import pandas as pd from pyomo.core.base.block import BlockData, declare_custom_block from pyomo.environ import Var, Constraint import pyomo.environ as pyo @@ -15,20 +16,12 @@ class BaseOperatorData(BlockData): """Base class for defining operator models""" - _operator_name = "base_operator" _is_unary_operator = False _define_aux_var_right = False @property - def regression_model(self): - """ - Returns the symbolic regression model - - Model structure: - main_model -> Sample block -> Node block -> Operator block - So, calling the parent_block method thrice returns the - SymbolicRegressionModel object. - """ + def symbolic_regression_model(self): + """Returns a pointer to the symbolic regression model""" return self._bin_var_ref.parent_block() @property @@ -36,8 +29,8 @@ def operator_binary(self): """Returns the binary variable associated with the operator""" return self._bin_var_ref - # pylint: disable = attribute-defined-outside-init - def build(self, bin_var: Var): + # pylint: disable = attribute-defined-outside-init, unused-argument + def build(self, *args, bin_var: Var): """rule for constructing operator blocks""" srm = bin_var.parent_block() # Symbolic regression model lb = srm.var_bounds["lb"] @@ -58,13 +51,13 @@ def build(self, bin_var: Var): # Declare an auxiliary variable for operators with singularity issue self.aux_var_right = Var( doc="Auxiliary variable for the right child", - bounds=(eps, ub), # Ensure that the RHS is strictly positive + bounds=(eps, max(1, ub)), # Ensure that the RHS is strictly positive ) - # Define val_right_node = aux_var_right * binary_var - self.calculate_val_right_node = Constraint( - expr=self.val_right_node == bin_var * self.aux_var_right - ) + # Ensure that aux_var_right = (val_right_node if bin_var = 1 else 1) + self.calculate_aux_var_right = Constraint( + expr=self.aux_var_right == self.val_right_node + 1 - bin_var + ) # Add operator-specific constraints try: @@ -108,14 +101,17 @@ def construct_convex_relaxation(self): """Constructs a convex relaxation of the model""" raise NotImplementedError("Convex relaxation is not supported") + @staticmethod + def compute_node_value(left_child, right_child): + """Helper function to evaluate the value for a given operator""" + raise NotImplementedError("Helper function is not implemented") + # pylint: disable = attribute-defined-outside-init, missing-class-docstring -@declare_custom_block("SumOperator") +@declare_custom_block("SumOperator", rule="build") class SumOperatorData(BaseOperatorData): - _operator_name = "sum" def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" self.evaluate_val_node = Constraint( expr=self.val_node == self.val_left_node + self.val_right_node ) @@ -125,13 +121,15 @@ def construct_convex_relaxation(self): # Operator model is convex by default, so return. pass + @staticmethod + def compute_node_value(left_child, right_child): + return left_child + right_child -@declare_custom_block("DiffOperator") + +@declare_custom_block("DiffOperator", rule="build") class DiffOperatorData(BaseOperatorData): - _operator_name = "diff" def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" self.evaluate_val_node = Constraint( expr=self.val_node == self.val_left_node - self.val_right_node ) @@ -141,13 +139,15 @@ def construct_convex_relaxation(self): # Operator model is convex by default, so return. pass + @staticmethod + def compute_node_value(left_child, right_child): + return left_child - right_child + -@declare_custom_block("MultOperator") +@declare_custom_block("MultOperator", rule="build") class MultOperatorData(BaseOperatorData): - _operator_name = "mult" def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" self.evaluate_val_node = Constraint( expr=self.val_node == self.val_left_node * self.val_right_node ) @@ -156,121 +156,129 @@ def build_operator_model(self): def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(left_child, right_child): + return left_child * right_child -@declare_custom_block("DivOperator") + +@declare_custom_block("DivOperator", rule="build") class DivOperatorData(BaseOperatorData): - _operator_name = "div" _define_aux_var_right = True def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" self.evaluate_val_node = Constraint( expr=self.val_node * self.aux_var_right == self.val_left_node ) - self.add_bound_constraints(val_right_node=False) + self.add_bound_constraints() def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(left_child, right_child): + return left_child / right_child + # pylint: disable = no-member -@declare_custom_block("SquareOperator") +@declare_custom_block("SquareOperator", rule="build") class SquareOperatorData(BaseOperatorData): - _operator_name = "square" _is_unary_operator = True def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" - # For unary operator, left node is fixed to zero. - self.add_bound_constraints(val_left_node=False) - # Evaluate the value at the node self.evaluate_val_node = Constraint( expr=self.val_node == self.val_right_node * self.val_right_node ) + # val_node will be non-negative in this case, so update lb + self.val_node.setlb(0) + + # For unary operator, left node is fixed to zero. + self.add_bound_constraints(val_left_node=False) + def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, right_child): + return right_child**2 + -@declare_custom_block("SqrtOperator") +@declare_custom_block("SqrtOperator", rule="build") class SqrtOperatorData(BaseOperatorData): - _operator_name = "sqrt" _is_unary_operator = True def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" - self.add_bound_constraints(val_left_node=False) - # Expressing the constraint in this manner makes it a # quadratic constraint, instead of a general nonlinear constraint self.evaluate_val_node = Constraint( expr=self.val_node * self.val_node == self.val_right_node ) + # val_right_node must be non-negative in this case + self.val_right_node.setlb(0) + self.add_bound_constraints(val_left_node=False) + def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, right_child): + return pyo.sqrt(right_child) + -@declare_custom_block("ExpOperator") +@declare_custom_block("ExpOperator", rule="build") class ExpOperatorData(BaseOperatorData): - _operator_name = "exp" _is_unary_operator = True def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" # Update the domain of variables ub_val_node = self.val_node.ub ub_val_right_node = self.val_right_node.ub - self.val_node.setub(min(ub_val_node, pyo.exp(ub_val_right_node))) self.val_right_node.setub(min(pyo.log(ub_val_node), ub_val_right_node)) self.val_node.setlb(0) # To avoid numerical issues, we do not let the lower bound of the # argument of the exp function go below -10 - if self.val_right_node.lb < -10: - self.val_right_node.setlb(-10) - + self.val_right_node.setlb(max(-10, self.val_right_node.lb)) self.add_bound_constraints(val_left_node=False) - self.del_component(self.node_val_lb_con) - # Exponential term does not become zero, so multiply it - # with the operator binary variable to make it zero + # If the operator is not chosen, then val_right_node = 0, + # exp(val_right_node) = 1. So, we add (bin_var - 1) to make the + # expression evaluate to zero. self.evaluate_val_node = Constraint( - expr=self.val_node == self._bin_var_ref * pyo.exp(self.val_right_node) + expr=self.val_node == pyo.exp(self.val_right_node) + self._bin_var_ref - 1 ) def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, right_child): + return pyo.exp(right_child) + -@declare_custom_block("LogOperator") +@declare_custom_block("LogOperator", rule="build") class LogOperatorData(BaseOperatorData): - _operator_name = "log" _is_unary_operator = True _define_aux_var_right = True def build_operator_model(self): - """Constructs constraint that evaluates the value at the node""" - # Update bounds on variables - ub_val_node = self.val_node.ub - ub_val_right_node = self.aux_var_right.ub - - self.val_node.setub(min(ub_val_node, pyo.log(ub_val_right_node))) - self.aux_var_right.setub(min(pyo.exp(ub_val_node), ub_val_right_node)) - self.val_right_node.setub(self.aux_var_right.ub) - - # Log term need not vanish when the operator is not selected, so - # multiply it with the operator binary variable to make it zero - bin_var = self._bin_var_ref + # If the operator is not selected, aux_var_right = 1, so log vanishes self.evaluate_val_node = Constraint( - expr=self.val_node == bin_var * pyo.log(self.aux_var_right) + expr=self.val_node == pyo.log(self.aux_var_right) ) + self.add_bound_constraints(val_left_node=False) + def construct_convex_relaxation(self): raise NotImplementedError() + @staticmethod + def compute_node_value(_, right_child): + return pyo.log(right_child) + OPERATOR_MODELS = { "sum": SumOperator, @@ -288,6 +296,11 @@ def construct_convex_relaxation(self): class HullSampleBlockData(BlockData): """Class for evaluating the expression tree for each sample""" + @property + def symbolic_regression_model(self): + """Returns a pointer to the symbolic regression model""" + return self.parent_block() + def build(self, s): """rule for building the expression model for each sample""" pb = self.parent_block() @@ -324,10 +337,10 @@ def node(blk, n): # Build operator blocks for all unary and binary operators setattr( self.node[n], - "op_" + op, + op + "_operator", OPERATOR_MODELS[op](bin_var=pb.select_operator[n, op]), ) - op_blocks.append(getattr(self.node[n], "op_" + op)) + op_blocks.append(getattr(self.node[n], op + "_operator")) self.node[n].evaluate_val_node_var = Constraint( expr=self.node[n].val_node @@ -350,3 +363,47 @@ def node(blk, n): doc="Computes the residual between prediction and the data", ) self.square_of_residual = pyo.Expression(expr=self.residual**2) + + def add_symmetry_breaking_cuts(self): + """Adds symmetry breaking cuts to the sample""" + srm = self.symbolic_regression_model + vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] + symmetric_operators = [ + op for op in ["sum", "mult"] if op in srm.binary_operators_set + ] + + @self.Constraint(srm.non_terminal_nodes_set) + def symmetry_breaking_constraints(blk, n): + return blk.node[2 * n].val_node - blk.node[2 * n + 1].val_node >= ( + vlb - vub + ) * ( + srm.select_node[n] + - sum(srm.select_operator[n, op] for op in symmetric_operators) + ) + + def compare_node_values(self): + """Compares the node values obtained from the model and manual calculation""" + srm = self.symbolic_regression_model + data = srm.input_data_ref.loc[self.index()] + true_value = { + n: srm.constant_val[n].value + + sum( + data[op] * srm.select_operator[n, op].value + for op in srm.operands_set + if op != "cst" + ) + for n in srm.nodes_set + } + for n in range(len(srm.non_terminal_nodes_set), 0, -1): + for op in srm.operators_set: + if srm.select_operator[n, op].value > 0.99: + # Operator is selector + true_value[n] += getattr( + self.node[n], op + "_operator" + ).compute_node_value(true_value[2 * n], true_value[2 * n + 1]) + + computed_value = {n: self.node[n].val_node.value for n in self.node} + + return pd.DataFrame.from_dict( + {"True Value": true_value, "Computed Value": computed_value} + ) diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py index 9cd6ce0..c44020d 100644 --- a/pySTAR/symbolic_regression.py +++ b/pySTAR/symbolic_regression.py @@ -216,6 +216,33 @@ def redundant_bin_op_same_operand(blk, n, op): # Remove Associative operator combinations + # Remove composition of inverse functions exp(log(.)) square(sqrt(.)) + def _inverse_function_rule(blk, n, op_1, op_2): + if op_1 == op_2 or (2 * n + 1) in self.terminal_nodes_set: + # An example for op_1 = op_2: exp(exp(.)) or log(log(.)) + # Also, unary operator cannot be present in a terminal node + return Constraint.Skip + return ( + blk.select_operator[n, op_1] + blk.select_operator[2 * n + 1, op_2] + <= blk.select_node[n] + ) + + if "exp" in self.unary_operators_set and "log" in self.unary_operators_set: + self.redundant_inv_op_exp_log = Constraint( + self.non_terminal_nodes_set, + ["exp", "log"], + ["exp", "log"], + rule=_inverse_function_rule, + ) + + if "square" in self.unary_operators_set and "sqrt" in self.unary_operators_set: + self.redundant_inv_op_square_sqrt = Constraint( + self.non_terminal_nodes_set, + ["square", "sqrt"], + ["square", "sqrt"], + rule=_inverse_function_rule, + ) + def add_symmetry_breaking_cuts(self, sample_name=None): """Adds cuts to eliminate symmetrical trees from the model""" if sample_name is None: From 3a179dd9d00471e9b61cf48647221c469a2d075c Mon Sep 17 00:00:00 2001 From: radhakrishnatg Date: Sat, 4 Oct 2025 20:05:48 -0400 Subject: [PATCH 8/9] Fixed a bug on naming convention --- pySTAR/bigm_operators.py | 50 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/pySTAR/bigm_operators.py b/pySTAR/bigm_operators.py index 86a9b79..78077b5 100644 --- a/pySTAR/bigm_operators.py +++ b/pySTAR/bigm_operators.py @@ -46,7 +46,7 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(right_child: float, left_child: float): + def compute_node_value(left_child: float, right_child: float): """Computes the value at a node based on the operator""" raise NotImplementedError() @@ -78,8 +78,8 @@ def construct_convex_relaxation(self): pass @staticmethod - def compute_node_value(right_child, left_child): - return right_child + left_child + def compute_node_value(left_child, right_child): + return left_child + right_child @declare_custom_block("DiffOperator", rule="build") @@ -109,8 +109,8 @@ def construct_convex_relaxation(self): pass @staticmethod - def compute_node_value(right_child, left_child): - return right_child - left_child + def compute_node_value(left_child, right_child): + return left_child - right_child @declare_custom_block("MultOperator", rule="build") @@ -144,8 +144,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(right_child, left_child): - return right_child * left_child + def compute_node_value(left_child, right_child): + return left_child * right_child @declare_custom_block("DivOperator", rule="build") @@ -186,8 +186,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(right_child, left_child): - return right_child / left_child + def compute_node_value(left_child, right_child): + return left_child / right_child @declare_custom_block("SqrtOperator", rule="build") @@ -216,8 +216,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.sqrt(left_child) + def compute_node_value(_, right_child): + return pyo.sqrt(right_child) @declare_custom_block("ExpOldOperator", rule="build") @@ -251,8 +251,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.exp(left_child) + def compute_node_value(_, right_child): + return pyo.exp(right_child) @declare_custom_block("ExpCompOperator", rule="build") @@ -270,8 +270,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.exp(left_child) + def compute_node_value(_, right_child): + return pyo.exp(right_child) @declare_custom_block("ExpOperator", rule="build") @@ -321,8 +321,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.exp(left_child) + def compute_node_value(_, right_child): + return pyo.exp(right_child) @declare_custom_block("LogOldOperator", rule="build") @@ -363,8 +363,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.log(left_child) + def compute_node_value(_, right_child): + return pyo.log(right_child) @declare_custom_block("LogCompOperator", rule="build") @@ -383,8 +383,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.log(left_child) + def compute_node_value(_, right_child): + return pyo.log(right_child) @declare_custom_block("LogOperator", rule="build") @@ -434,8 +434,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return pyo.log(left_child) + def compute_node_value(_, right_child): + return pyo.log(right_child) @declare_custom_block("SquareOperator", rule="build") @@ -460,8 +460,8 @@ def construct_convex_relaxation(self): raise NotImplementedError() @staticmethod - def compute_node_value(_, left_child): - return left_child**2 + def compute_node_value(_, right_child): + return right_child**2 # pylint: disable = undefined-variable From 5cba231b6e7e3e5bec46b40557666f6f9ed0e977 Mon Sep 17 00:00:00 2001 From: radhakrishnatg Date: Sat, 4 Oct 2025 22:40:45 -0400 Subject: [PATCH 9/9] Added support for Gurobi solver --- pySTAR/symbolic_regression.py | 22 ++++++- pySTAR/symbolic_regression_example.py | 16 ++--- pySTAR/utils.py | 87 ++++++++++++++++++--------- 3 files changed, 85 insertions(+), 40 deletions(-) diff --git a/pySTAR/symbolic_regression.py b/pySTAR/symbolic_regression.py index c44020d..1cc4e81 100644 --- a/pySTAR/symbolic_regression.py +++ b/pySTAR/symbolic_regression.py @@ -5,7 +5,7 @@ from pyomo.environ import Var, Constraint # pylint: disable = import-error -from bigm_operators import BigmSampleBlock +from bigm_operators import BigmSampleBlock, BigmSampleBlockData from hull_operators import HullSampleBlock LOGGER = logging.getLogger(__name__) @@ -34,6 +34,7 @@ def __init__( ): super().__init__(*args, **kwds) + self.model_type = model_type # Check if the received operators are supported or not if operators is None: # If not specified, use all supported operators @@ -350,6 +351,25 @@ def get_selected_operators(self): def selected_tree_to_expression(self): """Returns the optimal expression as a string""" + def get_parity_plot_data(self): + """Returns a DataFrame containing actual outputs and predicted outputs""" + results = pd.DataFrame() + results["sim_data"] = self.output_data_ref + first_sample_name = self.input_data_ref.iloc[0].name + if isinstance(self.samples[first_sample_name], BigmSampleBlockData): + results["prediction"] = [ + self.samples[s].val_node[1].value for s in self.samples + ] + else: + results["prediction"] = [ + self.samples[s].node[1].val_node.value for s in self.samples + ] + results["square_of_error"] = [ + self.samples[s].square_of_residual.expr() for s in self.samples + ] + + return results + def _get_operand_domains(data: pd.DataFrame, tol: float): near_zero_operands = [] diff --git a/pySTAR/symbolic_regression_example.py b/pySTAR/symbolic_regression_example.py index d4d9daf..a5b197f 100644 --- a/pySTAR/symbolic_regression_example.py +++ b/pySTAR/symbolic_regression_example.py @@ -2,6 +2,7 @@ import pandas as pd import pyomo.environ as pyo from symbolic_regression import SymbolicRegressionModel +from utils import get_gurobi np.random.seed(42) data = pd.DataFrame( @@ -39,20 +40,13 @@ def build_model(): solver.solve(mdl, tee=True) elif solver == "gurobi": - mdl.solve_with_gurobi() + solver = get_gurobi(mdl) + solver.solve(tee=True) elif solver == "baron": solver = pyo.SolverFactory("gams") solver.solve(mdl, solver="baron", tee=True) - print(mdl.get_selected_operators()) mdl.constant_val.pprint() - - results = pd.DataFrame() - results["sim_data"] = data["y"] - results["prediction"] = [mdl.samples[s].val_node[1].value for s in mdl.samples] - results["square_of_error"] = [ - mdl.samples[s].square_of_residual.expr() for s in mdl.samples - ] - - print(results) + print(mdl.get_parity_plot_data()) + print(mdl.get_selected_operators()) diff --git a/pySTAR/utils.py b/pySTAR/utils.py index b03266b..3e5a12c 100644 --- a/pySTAR/utils.py +++ b/pySTAR/utils.py @@ -1,15 +1,16 @@ from gurobipy import nlfunc import pyomo.environ as pyo -from bigm_operators import LogOperatorData as BigmLog, ExpOperatorData as BigmExp -from hull_operators import LogOperatorData as HullLog, ExpOperatorData as HullExp -from pySTAR.symbolic_regression import SymbolicRegressionModel +import bigm_operators as bop +import hull_operators as hop +from symbolic_regression import SymbolicRegressionModel def _bigm_gurobi_formulation(srm: SymbolicRegressionModel): for blk in srm.component_data_objects(pyo.Block): - if isinstance(blk, (BigmLog, BigmExp)): - # Deactivate the block - blk.deactivate() + if isinstance(blk, (bop.ExpOperatorData, bop.LogOperatorData)): + # Deactivate the nonlinear constraints + blk.func_upper_bound_constraint.deactivate() + blk.func_lower_bound_constraint.deactivate() solver = pyo.SolverFactory("gurobi_persistent") solver.set_instance(srm) @@ -18,22 +19,56 @@ def _bigm_gurobi_formulation(srm: SymbolicRegressionModel): vlb, vub = srm.var_bounds["lb"], srm.var_bounds["ub"] for blk in srm.component_data_objects(pyo.Block): - if isinstance(blk, BigmLog): + if isinstance(blk, bop.LogOperatorData): sb = blk.parent_block() # Sample block val_node = sb.val_node op_bin_var = {n: srm.select_operator[n, "log"] for n in srm.nodes_set} - if vub >= 10: - _bigm = pyo.value(1e5 - vlb) - else: - _bigm = pyo.value(pyo.exp(vub) - vlb) + aux_vars = gm.addVars(list(srm.non_terminal_nodes_set)) + gm.addConstrs( + aux_vars[n] == nlfunc.log(pm_to_gm[blk.aux_var_log[n]]) + for n in srm.non_terminal_nodes_set + ) + gm.addConstrs( + pm_to_gm[val_node[n]] - aux_vars[n] + <= (vub - pyo.log(blk.aux_var_log[n].lb)) + * (1 - pm_to_gm[op_bin_var[n]]) + for n in srm.non_terminal_nodes_set + ) + gm.addConstrs( + pm_to_gm[val_node[n]] - aux_vars[n] + >= (vlb - pyo.log(blk.aux_var_log[n].ub)) + * (1 - pm_to_gm[op_bin_var[n]]) + for n in srm.non_terminal_nodes_set + ) - gm.addConstr( - nlfunc.exp(pm_to_gm[val_node[n]]) - pm_to_gm[val_node[2 * n + 1]] - <= _bigm * (1 - pm_to_gm[op_bin_var[n]]) + if isinstance(blk, bop.ExpOperatorData): + sb = blk.parent_block() # Sample block + val_node = sb.val_node + op_bin_var = {n: srm.select_operator[n, "exp"] for n in srm.nodes_set} + + aux_vars = gm.addVars(list(srm.non_terminal_nodes_set)) + gm.addConstrs( + aux_vars[n] == nlfunc.exp(pm_to_gm[blk.aux_var_exp[n]]) + for n in srm.non_terminal_nodes_set + ) + gm.addConstrs( + pm_to_gm[val_node[n]] - aux_vars[n] + <= (vub - 0) * (1 - pm_to_gm[op_bin_var[n]]) + for n in srm.non_terminal_nodes_set + ) + gm.addConstrs( + pm_to_gm[val_node[n]] - aux_vars[n] + >= (vlb - vub) * (1 - pm_to_gm[op_bin_var[n]]) for n in srm.non_terminal_nodes_set ) + for blk in srm.component_data_objects(pyo.Block): + if isinstance(blk, (bop.ExpOperatorData, bop.LogOperatorData)): + # Activate the nonlinear constraints + blk.func_upper_bound_constraint.activate() + blk.func_lower_bound_constraint.activate() + return solver @@ -41,7 +76,7 @@ def _hull_gurobi_formulation(m: SymbolicRegressionModel): """Uses Gurobibpy interface to solve the MINLP""" for blk in m.component_data_objects(pyo.Block): - if isinstance(blk, (HullLog, HullExp)): + if isinstance(blk, (hop.ExpOperatorData, hop.LogOperatorData)): # Deactivate the nonlinear constraint blk.evaluate_val_node.deactivate() @@ -52,29 +87,25 @@ def _hull_gurobi_formulation(m: SymbolicRegressionModel): pm_to_gm = grb._pyomo_var_to_solver_var_map for blk in m.component_data_objects(pyo.Block): - if isinstance(blk, HullLog): + if isinstance(blk, hop.LogOperatorData): # Add the nonlinear constraint gm.addConstr( - pm_to_gm[blk.val_node] - == pm_to_gm[blk.operator_binary] - * nlfunc.log(pm_to_gm[blk.aux_var_right]) + pm_to_gm[blk.val_node] == nlfunc.log(pm_to_gm[blk.aux_var_right]) ) - elif isinstance(blk, HullExp): + elif isinstance(blk, hop.ExpOperatorData): # Add the nonlinear constraint gm.addConstr( pm_to_gm[blk.val_node] - == pm_to_gm[blk.operator_binary] - * nlfunc.exp(pm_to_gm[blk.val_right_node]) + == nlfunc.exp(pm_to_gm[blk.val_right_node]) + + pm_to_gm[blk.operator_binary] + - 1 ) - # Solve the optimization model - grb.solve(tee=True) - # Activate the constraint back for blk in m.component_data_objects(pyo.Block): - if isinstance(blk, (HullLog, HullExp)): - # Deactivate the nonlinear constraint + if isinstance(blk, (hop.LogOperatorData, hop.ExpOperatorData)): + # Activate the nonlinear constraint blk.evaluate_val_node.activate() return grb @@ -86,7 +117,7 @@ def get_gurobi(srm: SymbolicRegressionModel, options: dict | None = None): # Set default termination criteria options = {"MIPGap": 0.01, "TimeLimit": 3600} - if srm.method_type == "bigm": + if srm.model_type == "bigm": solver = _bigm_gurobi_formulation(srm) else: solver = _hull_gurobi_formulation(srm)