diff --git a/python/paddle/compat/nn/__init__.py b/python/paddle/compat/nn/__init__.py index 2c0241fe5377f7..b420ff2ac47ec0 100644 --- a/python/paddle/compat/nn/__init__.py +++ b/python/paddle/compat/nn/__init__.py @@ -40,6 +40,7 @@ __all__ = [ 'Unfold', 'Linear', + 'Softmax', 'AvgPool1D', 'AvgPool2D', 'AvgPool3D', @@ -400,9 +401,6 @@ def __setstate__(self, state): self.__dict__.setdefault("count_include_pad", True) -__all__ = ['Unfold', 'Linear', 'MultiheadAttention'] - - class Unfold(nn.Unfold): """ A compatible version of paddle.nn.Unfold: @@ -466,7 +464,6 @@ def to_list_if_necessary(x): strides=to_list_if_necessary(self.strides), paddings=to_list_if_necessary(self.paddings), dilations=to_list_if_necessary(self.dilations), - name=self.name, ) @@ -613,6 +610,135 @@ def reset_parameters(self) -> None: nn.init.uniform_(self.bias, -bound, bound) +class Softmax(nn.Layer): + r""" + Softmax Activation. + + This operator implements the softmax layer. The calculation process is as follows: + + 1. The dimension :attr:`dim` of ``input`` will be permuted to the last. + + 2. Then ``input`` will be logically flattened to a 2-D matrix. The matrix's second + dimension(row length) is the same as the dimension :attr:`dim` of ``input``, + and the first dimension(column length) is the product of all other dimensions + of ``input``. For each row of the matrix, the softmax operator squashes the + K-dimensional(K is the width of the matrix, which is also the size of ``input``'s + dimension :attr:`dim`) vector of arbitrary real values to a K-dimensional + vector of real values in the range [0, 1] that add up to 1. + + 3. After the softmax operation is completed, the inverse operations of steps 1 and 2 + are performed to restore the two-dimensional matrix to the same dimension as the ``input`` . + + It computes the exponential of the given dimension and the sum of exponential + values of all the other dimensions in the K-dimensional vector input. + Then the ratio of the exponential of the given dimension and the sum of + exponential values of all the other dimensions is the output of the softmax + operator. + + For each row :math:`i` and each column :math:`j` in the matrix, we have: + + .. math:: + + Softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])} + + Example: + + .. code-block:: text + + Case 1: + Input: + x.shape = [2, 3, 4] + x.data = [[[2.0, 3.0, 4.0, 5.0], + [3.0, 4.0, 5.0, 6.0], + [7.0, 8.0, 8.0, 9.0]], + [[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [6.0, 7.0, 8.0, 9.0]]] + + Attrs: + dim = -1 + + Output: + out.shape = [2, 3, 4] + out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.07232949, 0.19661193, 0.19661193, 0.53444665]], + [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.0320586 , 0.08714432, 0.23688282, 0.64391426], + [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] + + Case 2: + Input: + x.shape = [2, 3, 4] + x.data = [[[2.0, 3.0, 4.0, 5.0], + [3.0, 4.0, 5.0, 6.0], + [7.0, 8.0, 8.0, 9.0]], + [[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [6.0, 7.0, 8.0, 9.0]]] + Attrs: + dim = 1 + + Output: + out.shape = [2, 3, 4] + out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783], + [0.01786798, 0.01786798, 0.04661262, 0.04661262], + [0.97555875, 0.97555875, 0.93623955, 0.93623955]], + [[0.00490169, 0.00490169, 0.00490169, 0.00490169], + [0.26762315, 0.26762315, 0.26762315, 0.26762315], + [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] + + Parameters: + dim (int, optional): The dim along which to perform log_softmax + calculations. It should be in range [-D, D), where D is the + dimensions of ``input`` . If ``dim`` < 0, it works the same way as + :math:`dim + D` . Default is None. + + Shape: + - input: Tensor with any shape. + - output: Tensor with the same shape as input. + + Examples: + .. code-block:: python + + >>> import paddle + + >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0], + ... [3.0, 4.0, 5.0, 6.0], + ... [7.0, 8.0, 8.0, 9.0]], + ... [[1.0, 2.0, 3.0, 4.0], + ... [5.0, 6.0, 7.0, 8.0], + ... [6.0, 7.0, 8.0, 9.0]]], dtype='float32') + >>> m = paddle.compat.nn.Softmax() + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[0.73105854, 0.73105854, 0.73105854, 0.73105854], + [0.11920292, 0.11920292, 0.11920292, 0.11920292], + [0.73105854, 0.73105854, 0.50000000, 0.50000000]], + [[0.26894143, 0.26894143, 0.26894143, 0.26894143], + [0.88079703, 0.88079703, 0.88079703, 0.88079703], + [0.26894143, 0.26894143, 0.50000000, 0.50000000]]]) + + """ + + @ForbidKeywordsDecorator( + illegal_keys={"axis"}, + func_name="paddle.compat.nn.Softmax", + correct_name="paddle.nn.Softmax", + ) + def __init__(self, dim: int | None = None) -> None: + super().__init__() + self._dim = dim + self._dtype = None + + def forward(self, input: Tensor) -> Tensor: + return functional.softmax(input, self._dim) + + def extra_repr(self) -> str: + return f"dim={self.dim}" + + AvgPool1d = AvgPool1D AvgPool2d = AvgPool2D AvgPool3d = AvgPool3D diff --git a/test/legacy_test/test_compat_softmax.py b/test/legacy_test/test_compat_softmax.py new file mode 100644 index 00000000000000..0fd618de8e2bd3 --- /dev/null +++ b/test/legacy_test/test_compat_softmax.py @@ -0,0 +1,75 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np + +import paddle + + +class TestCompatSoftmax(unittest.TestCase): + def _compare_with_origin(self, input_tensor, axis): + softmax_compat = paddle.compat.nn.Softmax(dim=axis) + softmax_origin = paddle.nn.Softmax(axis=axis) + + expected_res = softmax_origin(input_tensor).numpy() + np.testing.assert_allclose( + softmax_compat(input_tensor).numpy(), + expected_res, + rtol=1e-6, + atol=1e-6, + ) + + def test_compare_with_origin(self): + input_shape = (3, 4) + input_tensor = paddle.randn(input_shape, dtype=paddle.float32) + self._compare_with_origin(input_tensor, axis=0) + self._compare_with_origin(input_tensor, axis=1) + self._compare_with_origin(input_tensor, axis=-1) + + input_shape = (2, 3, 4) + input_tensor = paddle.randn(input_shape, dtype=paddle.float64) + self._compare_with_origin(input_tensor, axis=0) + self._compare_with_origin(input_tensor, axis=1) + self._compare_with_origin(input_tensor, axis=2) + self._compare_with_origin(input_tensor, axis=-1) + + input_shape = (2, 3, 4, 5) + input_tensor = paddle.randn(input_shape, dtype=paddle.float32) + self._compare_with_origin(input_tensor, axis=1) + self._compare_with_origin(input_tensor, axis=-2) + + input_tensor = paddle.randn((2, 3), dtype=paddle.float32) + softmax_compat = paddle.compat.nn.Softmax() + softmax_origin = paddle.nn.Softmax() + expected_res = softmax_origin(input_tensor).numpy() + np.testing.assert_allclose( + softmax_compat(input_tensor).numpy(), + expected_res, + rtol=1e-6, + atol=1e-6, + ) + + def test_error_handling(self): + x = paddle.randn([3, 9, 5]) + + msg_gt_1 = "paddle.compat.nn.Softmax() received unexpected keyword argument 'axis'. \nDid you mean to use paddle.nn.Softmax() instead?" + + with self.assertRaises(TypeError) as cm: + softmax = paddle.compat.nn.Softmax(axis=1) + self.assertEqual(str(cm.exception), msg_gt_1) + + +if __name__ == "__main__": + unittest.main()