[Fixbug] Fix soc_version for 310p

zhangxinyuehfad · zhangxinyuehfad · commit d145a4dcd9e1 · 2025-09-08T16:09:12.000+08:00
Signed-off-by: hfadzxy &lt;starmoon_zhang@163.com&gt;
diff --git a/tests/ut/test_utils.py b/tests/ut/test_utils.py
@@ -311,6 +311,91 @@ def test_register_ascend_customop(self, mock_ascend_rmsnorm,
         # should not register_oot again, thus only called three in this ut
         self.assertEqual(mock_customop.register_oot.call_count, 12)
 
+    def test_nd_to_nz_spec(self):
+        mask_tensor = torch.ones(32, 64, dtype=torch.bool)
+        output = utils.nd_to_nz_spec(mask_tensor)
+        self.assertEqual(output.shape, (1, 4, 32, 16))  # 64/16=4, 32->32
+
+        mask_tensor = torch.ones(30, 62, dtype=torch.bool)
+        output = utils.nd_to_nz_spec(mask_tensor)
+        self.assertEqual(output.shape, (1, 4, 32, 16))  # 62->64, 30->32
+
+        mask_tensor = torch.ones(16, 16, dtype=torch.bool)
+        output = utils.nd_to_nz_spec(mask_tensor)
+        self.assertTrue(torch.all(output[0, 0, :16, :16] == 1))
+        self.assertTrue(torch.all(output[0, 0, 16:, :] == 0))
+        self.assertTrue(torch.all(output[0, 1:, :, :] == 0))
+
+    def test_dispose_tensor(self):
+        x = torch.ones(10, 10)
+        original_data_ptr = x.data_ptr()
+        utils.dispose_tensor(x)
+        self.assertEqual(x.numel(), 0)
+        self.assertNotEqual(x.data_ptr(), original_data_ptr)
+
+    def test_npu_prefetch(self):
+        input_tensor = torch.ones(10, device='npu')
+        dependency = torch.ones(5, device='npu')
+        utils.npu_prefetch(input_tensor, dependency, enabled=True)
+
+        utils.npu_prefetch(input_tensor, dependency, enabled=False)
+
+
+    def test_init_ascend_soc_version(self):
+        test_cases = [
+            (220, utils.AscendSocVersion.A2),
+            (225, utils.AscendSocVersion.A2),
+            (250, utils.AscendSocVersion.A3),
+            (255, utils.AscendSocVersion.A3),
+            (202, utils.AscendSocVersion.P3),
+            (999, utils.AscendSocVersion.UNDEFINED),
+        ]
+
+        for soc_version, expected in test_cases:
+            with self.subTest(soc_version=soc_version):
+                with mock.patch('torch_npu.npu.get_soc_version', return_value=soc_version):
+                    utils._ascend_soc_version = None  # Reset
+                    utils.init_ascend_soc_version()
+                    result = utils.get_ascend_soc_version()
+                    self.assertEqual(result, expected)
+
+    def test_get_ascend_soc_version(self):
+        utils._ascend_soc_version = None
+        with self.assertRaises(AssertionError):
+            utils.get_ascend_soc_version()
+
+        utils._ascend_soc_version = utils.AscendSocVersion.A2
+        self.assertEqual(utils.get_ascend_soc_version(), utils.AscendSocVersion.A2)
+
+    def test_lmhead_tp_enable(self):
+        with mock.patch('vllm_ascend.utils.get_ascend_config') as mock_config:
+            mock_config.return_value.lmhead_tensor_parallel_size = 2
+            self.assertTrue(utils.lmhead_tp_enable())
+
+            mock_config.return_value.lmhead_tensor_parallel_size = None
+            self.assertFalse(utils.lmhead_tp_enable())
+
+    def test_oproj_tp_enable(self):
+        with mock.patch('vllm_ascend.utils.get_ascend_config') as mock_config:
+            mock_config.return_value.oproj_tensor_parallel_size = 2
+            self.assertTrue(utils.oproj_tp_enable())
+
+            mock_config.return_value.oproj_tensor_parallel_size = None
+            self.assertFalse(utils.oproj_tp_enable())
+
+    def test_mlp_tp_enable(self):
+        with mock.patch.dict(os.environ, {'VLLM_ASCEND_ENABLE_MLP_OPTIMIZE': '1'}):
+            self.assertTrue(utils.mlp_tp_enable())
+
+        with mock.patch.dict(os.environ, {'VLLM_ASCEND_ENABLE_MLP_OPTIMIZE': '0'}):
+            self.assertFalse(utils.mlp_tp_enable())
+
+    def test_matmul_allreduce_enable(self):
+        with mock.patch.dict(os.environ, {'VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE': '1'}):
+            self.assertTrue(utils.matmul_allreduce_enable())
+
+        with mock.patch.dict(os.environ, {'VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE': '0'}):
+            self.assertFalse(utils.matmul_allreduce_enable())
 
 class TestProfileExecuteDuration(TestBase):
 
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -49,6 +49,7 @@
 
 ASCEND_QUANTIZATION_METHOD = "ascend"
 SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"]
+ASCEND_310P_SOC_VERSION = 202
 
 ACL_FORMAT_FRACTAL_ND = 2
 ACL_FORMAT_FRACTAL_NZ = 29
@@ -535,7 +536,8 @@ def register_ascend_customop():
 class AscendSocVersion(Enum):
     A2 = 0
     A3 = 1
-    UNDEFINED = 2
+    P3 = 2
+    UNDEFINED = 3
 
 
 _ascend_soc_version = None
@@ -548,6 +550,8 @@ def init_ascend_soc_version():
         _ascend_soc_version = AscendSocVersion.A2
     elif 250 <= soc_version <= 255:
         _ascend_soc_version = AscendSocVersion.A3
+    elif soc_version == ASCEND_310P_SOC_VERSION:
+        _ascend_soc_version = AscendSocVersion.P3
     else:
         _ascend_soc_version = AscendSocVersion.UNDEFINED
 
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -1465,6 +1465,8 @@ def _select_moe_comm_method(self, num_tokens: int) -> str:
                 moe_comm_method = "mc2"
             else:
                 moe_comm_method = "allgather"
+        elif soc_version in {AscendSocVersion.P3}:
+            moe_comm_method = "allgather"
         elif soc_version in {AscendSocVersion.A3}:
             moe_comm_method = "mc2" if num_tokens <= self.mc2_tokens_capacity else "alltoall"
         else: