diff --git a/python/paddle/compat/nn/__init__.py b/python/paddle/compat/nn/__init__.py
index 2c0241fe5377f7..b420ff2ac47ec0 100644
--- a/python/paddle/compat/nn/__init__.py
+++ b/python/paddle/compat/nn/__init__.py
@@ -40,6 +40,7 @@
 __all__ = [
     'Unfold',
     'Linear',
+    'Softmax',
     'AvgPool1D',
     'AvgPool2D',
     'AvgPool3D',
@@ -400,9 +401,6 @@ def __setstate__(self, state):
         self.__dict__.setdefault("count_include_pad", True)
 
 
-__all__ = ['Unfold', 'Linear', 'MultiheadAttention']
-
-
 class Unfold(nn.Unfold):
     """
     A compatible version of paddle.nn.Unfold:
@@ -466,7 +464,6 @@ def to_list_if_necessary(x):
             strides=to_list_if_necessary(self.strides),
             paddings=to_list_if_necessary(self.paddings),
             dilations=to_list_if_necessary(self.dilations),
-            name=self.name,
         )
 
 
@@ -613,6 +610,135 @@ def reset_parameters(self) -> None:
             nn.init.uniform_(self.bias, -bound, bound)
 
 
+class Softmax(nn.Layer):
+    r"""
+    Softmax Activation.
+
+    This operator implements the softmax layer. The calculation process is as follows:
+
+    1. The dimension :attr:`dim` of ``input`` will be permuted to the last.
+
+    2. Then ``input`` will be logically flattened to a 2-D matrix. The matrix's second
+    dimension(row length) is the same as the dimension :attr:`dim` of ``input``,
+    and the first dimension(column length) is the product of all other dimensions
+    of ``input``. For each row of the matrix, the softmax operator squashes the
+    K-dimensional(K is the width of the matrix, which is also the size of ``input``'s
+    dimension :attr:`dim`) vector of arbitrary real values to a K-dimensional
+    vector of real values in the range [0, 1] that add up to 1.
+
+    3. After the softmax operation is completed, the inverse operations of steps 1 and 2
+    are performed to restore the two-dimensional matrix to the same dimension as the ``input`` .
+
+    It computes the exponential of the given dimension and the sum of exponential
+    values of all the other dimensions in the K-dimensional vector input.
+    Then the ratio of the exponential of the given dimension and the sum of
+    exponential values of all the other dimensions is the output of the softmax
+    operator.
+
+    For each row :math:`i` and each column :math:`j` in the matrix, we have:
+
+    .. math::
+
+        Softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])}
+
+    Example:
+
+    .. code-block:: text
+
+        Case 1:
+          Input:
+            x.shape = [2, 3, 4]
+            x.data = [[[2.0, 3.0, 4.0, 5.0],
+                       [3.0, 4.0, 5.0, 6.0],
+                       [7.0, 8.0, 8.0, 9.0]],
+                      [[1.0, 2.0, 3.0, 4.0],
+                       [5.0, 6.0, 7.0, 8.0],
+                       [6.0, 7.0, 8.0, 9.0]]]
+
+          Attrs:
+            dim = -1
+
+          Output:
+            out.shape = [2, 3, 4]
+            out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                         [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
+                        [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
+
+        Case 2:
+          Input:
+            x.shape = [2, 3, 4]
+            x.data = [[[2.0, 3.0, 4.0, 5.0],
+                       [3.0, 4.0, 5.0, 6.0],
+                       [7.0, 8.0, 8.0, 9.0]],
+                      [[1.0, 2.0, 3.0, 4.0],
+                       [5.0, 6.0, 7.0, 8.0],
+                       [6.0, 7.0, 8.0, 9.0]]]
+          Attrs:
+            dim = 1
+
+          Output:
+            out.shape = [2, 3, 4]
+            out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
+                         [0.01786798, 0.01786798, 0.04661262, 0.04661262],
+                         [0.97555875, 0.97555875, 0.93623955, 0.93623955]],
+                        [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
+                         [0.26762315, 0.26762315, 0.26762315, 0.26762315],
+                         [0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
+
+    Parameters:
+        dim (int, optional): The dim along which to perform log_softmax
+            calculations. It should be in range [-D, D), where D is the
+            dimensions of ``input`` . If ``dim`` < 0, it works the same way as
+            :math:`dim + D` . Default is None.
+
+    Shape:
+        - input: Tensor with any shape.
+        - output: Tensor with the same shape as input.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
+            ...                        [3.0, 4.0, 5.0, 6.0],
+            ...                        [7.0, 8.0, 8.0, 9.0]],
+            ...                       [[1.0, 2.0, 3.0, 4.0],
+            ...                        [5.0, 6.0, 7.0, 8.0],
+            ...                        [6.0, 7.0, 8.0, 9.0]]], dtype='float32')
+            >>> m = paddle.compat.nn.Softmax()
+            >>> out = m(x)
+            >>> print(out)
+            Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[[0.73105854, 0.73105854, 0.73105854, 0.73105854],
+              [0.11920292, 0.11920292, 0.11920292, 0.11920292],
+              [0.73105854, 0.73105854, 0.50000000, 0.50000000]],
+             [[0.26894143, 0.26894143, 0.26894143, 0.26894143],
+              [0.88079703, 0.88079703, 0.88079703, 0.88079703],
+              [0.26894143, 0.26894143, 0.50000000, 0.50000000]]])
+
+    """
+
+    @ForbidKeywordsDecorator(
+        illegal_keys={"axis"},
+        func_name="paddle.compat.nn.Softmax",
+        correct_name="paddle.nn.Softmax",
+    )
+    def __init__(self, dim: int | None = None) -> None:
+        super().__init__()
+        self._dim = dim
+        self._dtype = None
+
+    def forward(self, input: Tensor) -> Tensor:
+        return functional.softmax(input, self._dim)
+
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}"
+
+
 AvgPool1d = AvgPool1D
 AvgPool2d = AvgPool2D
 AvgPool3d = AvgPool3D
diff --git a/test/legacy_test/test_compat_softmax.py b/test/legacy_test/test_compat_softmax.py
new file mode 100644
index 00000000000000..0fd618de8e2bd3
--- /dev/null
+++ b/test/legacy_test/test_compat_softmax.py
@@ -0,0 +1,75 @@
+#   Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+import numpy as np
+
+import paddle
+
+
+class TestCompatSoftmax(unittest.TestCase):
+    def _compare_with_origin(self, input_tensor, axis):
+        softmax_compat = paddle.compat.nn.Softmax(dim=axis)
+        softmax_origin = paddle.nn.Softmax(axis=axis)
+
+        expected_res = softmax_origin(input_tensor).numpy()
+        np.testing.assert_allclose(
+            softmax_compat(input_tensor).numpy(),
+            expected_res,
+            rtol=1e-6,
+            atol=1e-6,
+        )
+
+    def test_compare_with_origin(self):
+        input_shape = (3, 4)
+        input_tensor = paddle.randn(input_shape, dtype=paddle.float32)
+        self._compare_with_origin(input_tensor, axis=0)
+        self._compare_with_origin(input_tensor, axis=1)
+        self._compare_with_origin(input_tensor, axis=-1)
+
+        input_shape = (2, 3, 4)
+        input_tensor = paddle.randn(input_shape, dtype=paddle.float64)
+        self._compare_with_origin(input_tensor, axis=0)
+        self._compare_with_origin(input_tensor, axis=1)
+        self._compare_with_origin(input_tensor, axis=2)
+        self._compare_with_origin(input_tensor, axis=-1)
+
+        input_shape = (2, 3, 4, 5)
+        input_tensor = paddle.randn(input_shape, dtype=paddle.float32)
+        self._compare_with_origin(input_tensor, axis=1)
+        self._compare_with_origin(input_tensor, axis=-2)
+
+        input_tensor = paddle.randn((2, 3), dtype=paddle.float32)
+        softmax_compat = paddle.compat.nn.Softmax()
+        softmax_origin = paddle.nn.Softmax()
+        expected_res = softmax_origin(input_tensor).numpy()
+        np.testing.assert_allclose(
+            softmax_compat(input_tensor).numpy(),
+            expected_res,
+            rtol=1e-6,
+            atol=1e-6,
+        )
+
+    def test_error_handling(self):
+        x = paddle.randn([3, 9, 5])
+
+        msg_gt_1 = "paddle.compat.nn.Softmax() received unexpected keyword argument 'axis'. \nDid you mean to use paddle.nn.Softmax() instead?"
+
+        with self.assertRaises(TypeError) as cm:
+            softmax = paddle.compat.nn.Softmax(axis=1)
+        self.assertEqual(str(cm.exception), msg_gt_1)
+
+
+if __name__ == "__main__":
+    unittest.main()