From 3a679ffe81e2192e47fb3407edea69f32ddf2b35 Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Fri, 19 Jul 2024 17:02:51 +0800
Subject: [PATCH 01/12] change tests model list to debug ascend ci

---
 .../test_one_iter_traditional_model_list.yaml | 150 +++++++++---------
 1 file changed, 75 insertions(+), 75 deletions(-)

diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
index 54585ffcc..fcb32782b 100644
--- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
+++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
@@ -1,98 +1,98 @@
 camb:
     # # transformers
-    - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
+    # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
 
     # # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
-    - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
-    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
-    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
-      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
-    - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
-    - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
-    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
-    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
-      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
+    # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+    # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
+    #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
+    # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
+    # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
+    #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
 
     # # mmdetection
-    - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
-    - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
-    - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
+    # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
+    # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
+    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
+    # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
     # model problem
     # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead"
-    - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
-      precision: {atol: 0.025, metric: 0.02, rtol: 0.02}
-    - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
-    - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
+    # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
+    #   precision: {atol: 0.025, metric: 0.02, rtol: 0.02}
+    # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
+    # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
 
     # # mmdetection3d
-    - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
+    # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
 
     # # mmsegmentation
-    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
-    - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
-    - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
-    - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
+    # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
+    # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
+    # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
+    # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
 
     # # mmpose
-    - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
+    # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
 
-    # # # mmaction2
-    - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
+    # # # # mmaction2
+    # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
 
-    # # # mmocr
-    - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
-    - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
+    # # # # mmocr
+    # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
+    # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
 
-    # # mmyolo
-    - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
+    # # # mmyolo
+    # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
 
-    # # DI-engine
-    - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
-    - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
+    # # # DI-engine
+    # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
+    # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
 
 cuda:
     # transformers
-    - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
-    # # mmpretrain
-    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
-    - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
-    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
-    - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
-    - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
-    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
-    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
-    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
-      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
-    # mmdetection
-    - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
-    - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
-    - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
-    - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
-    - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
-    - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
-    - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead"
-    # mmpose
-    - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
-    # mmaction2
-    # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
-    # mmocr
-    - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
-    - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
-    # mmsegmentation
-    - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
-    - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
-    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
-    - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
-    # mmyolo
-    - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
-    # mmdetection3d
-    - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
-    # DI-engine
-    - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
-    - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
+    # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
+    # # # mmpretrain
+    # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
+    # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+    # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
+    # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
+    # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
+    # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
+    #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    # # mmdetection
+    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
+    # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
+    # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
+    # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
+    # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
+    # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
+    # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
+    # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead"
+    # # mmpose
+    # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
+    # # mmaction2
+    # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
+    # # mmocr
+    # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
+    # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
+    # # mmsegmentation
+    # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
+    # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
+    # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
+    # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
+    # # mmyolo
+    # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
+    # # mmdetection3d
+    # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
+    # # DI-engine
+    # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
+    # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
     # mmagic
     # - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable_diffusion"
 
@@ -103,8 +103,8 @@ ascend:
     # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
     #   fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable"
     # mmpretrain
-    # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
 
 kunlunxin:
     # mmpretrain
-    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"

From cab07d8e74cbc314d5502966b20c7f72c8e6056e Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Mon, 22 Jul 2024 17:07:35 +0800
Subject: [PATCH 02/12] fix conv2d memory format bug

---
 dipu/third_party/DIOPI | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI
index f33727c25..7508ef066 160000
--- a/dipu/third_party/DIOPI
+++ b/dipu/third_party/DIOPI
@@ -1 +1 @@
-Subproject commit f33727c25dd4811932afa42200bbb956fe1e3e2c
+Subproject commit 7508ef0665e1ad967b94d5efe0ef24ffc8f01fe4

From fa445a88d1f476e795501837f110ed0d006de5d9 Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Mon, 22 Jul 2024 18:46:43 +0800
Subject: [PATCH 03/12] fix ascend memory format config on conv2d

---
 dipu/third_party/DIOPI | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI
index 7508ef066..2e14e2cf9 160000
--- a/dipu/third_party/DIOPI
+++ b/dipu/third_party/DIOPI
@@ -1 +1 @@
-Subproject commit 7508ef0665e1ad967b94d5efe0ef24ffc8f01fe4
+Subproject commit 2e14e2cf99530976338874728facbe2021537c93

From 3c045e2d079cd77335e0b1003d012e1b2028492b Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Mon, 22 Jul 2024 18:57:06 +0800
Subject: [PATCH 04/12] another fix ascend memory format config on conv2d

---
 dipu/third_party/DIOPI | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI
index 2e14e2cf9..55b93f272 160000
--- a/dipu/third_party/DIOPI
+++ b/dipu/third_party/DIOPI
@@ -1 +1 @@
-Subproject commit 2e14e2cf99530976338874728facbe2021537c93
+Subproject commit 55b93f272d430e918c0ff780a6b885a584b60921

From 6fb9fd279971d2b8bc82ec35655d0348e36c1f3e Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Mon, 22 Jul 2024 20:14:26 +0800
Subject: [PATCH 05/12] revert to a older DIOPI commit

---
 dipu/third_party/DIOPI | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI
index 55b93f272..97342d4f9 160000
--- a/dipu/third_party/DIOPI
+++ b/dipu/third_party/DIOPI
@@ -1 +1 @@
-Subproject commit 55b93f272d430e918c0ff780a6b885a584b60921
+Subproject commit 97342d4f9c6bf6f821c7726fbee893e7645d4e80

From 05c2a470b30456f067eff9516bf0fb138508bbc0 Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Tue, 23 Jul 2024 12:19:41 +0800
Subject: [PATCH 06/12] update some one-iter-test model

---
 dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
index fcb32782b..bbe574948 100644
--- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
+++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
@@ -104,6 +104,9 @@ ascend:
     #   fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable"
     # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    # mmdetection
+    - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
+    - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
 
 kunlunxin:
     # mmpretrain

From 7f84a56e0ed918bffa7c88dfbcb0bbc22f0c62d6 Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Tue, 23 Jul 2024 13:29:29 +0800
Subject: [PATCH 07/12] revert model to check CI bug

---
 dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
index bbe574948..dcdd7e226 100644
--- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
+++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
@@ -105,8 +105,8 @@ ascend:
     # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
     # mmdetection
-    - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
+    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
+    # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
 
 kunlunxin:
     # mmpretrain

From 0b0649790665134487194bac28e415e638cc3828 Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Wed, 24 Jul 2024 15:09:37 +0800
Subject: [PATCH 08/12] add mobile v2

---
 dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
index dcdd7e226..ce4bd473d 100644
--- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
+++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
@@ -6,7 +6,7 @@ camb:
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
     # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
     # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
-    # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
+    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
     #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
     # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
     # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
@@ -104,6 +104,7 @@ ascend:
     #   fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable"
     # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
     # mmdetection
     # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
     # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"

From c9ad988109c7fca889ff2fd2c56b2c9a25d2497a Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Thu, 25 Jul 2024 19:03:56 +0800
Subject: [PATCH 09/12] update branch to latest

---
 .../diopi_functions.yaml                      |  21 +++
 .../test_one_iter_traditional_model_list.yaml | 166 +++++++++---------
 dipu/third_party/DIOPI                        |   2 +-
 .../csrc_dipu/aten/ops/DIPUCopy.hpp           |  14 +-
 .../csrc_dipu/diopirt/diopirt_impl.cpp        |   6 +
 .../csrc_dipu/vendor/cuda/CUDACopyInplace.cpp |  56 ++++--
 .../vendor/droplet/communicatorimpl.cpp       |  92 ++++++++--
 .../csrc_dipu/vendor/droplet/vendorapi.h      |   6 +-
 dipu/torch_dipu/dipu/dataloader.py            |  10 +-
 9 files changed, 253 insertions(+), 120 deletions(-)

diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml
index 2308ba7a3..cba2a4a7d 100755
--- a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml
+++ b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml
@@ -2633,6 +2633,27 @@
     return;
   interface: diopiMulInpScalar(ctx, self, other, alpha)
 
+- schema: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
+  dummy_call_diopi: True
+  custom_code_at_the_beginning: |
+    auto selfVec = self.vec();
+    for (size_t i = 0;i < self.size();i++) {
+      dipu_mul__tensor(selfVec[i], other);
+    }
+    return;
+  interface: diopiMulInp(ctx, self, other, alpha)
+
+- schema: _foreach_mul.Tensor(Tensor[] self, Tensor other) -> Tensor[]
+  dummy_call_diopi: True
+  custom_code_at_the_beginning: |
+    std::vector<at::Tensor> out(self.size());
+    for (size_t i = 0;i < self.size();i++) {
+      out[i] = nodispatch::empty_like(self[i]);
+      dipu_mul_out(self[i], other, out[i]);
+    }
+    return out;
+  interface: diopiMul(ctx, out, self, other)
+
 - schema: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
   dummy_call_diopi: True
   custom_code_at_the_beginning: |
diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
index ce4bd473d..39d29d1b9 100644
--- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
+++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
@@ -1,114 +1,116 @@
 camb:
     # # transformers
-    # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
+    - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
 
     # # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
-    # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
-    # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+    - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
+    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
     - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
-    #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
-    # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
-    # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
-    # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
-    # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
-    #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
+    - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
+    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
+      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
 
     # # mmdetection
-    # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
-    # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
-    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
+    - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
+    - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
+    - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
+    - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
     # model problem
     # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead"
-    # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
-    #   precision: {atol: 0.025, metric: 0.02, rtol: 0.02}
-    # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
-    # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
+    - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
+      precision: {atol: 0.025, metric: 0.02, rtol: 0.02}
+    - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
+    - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
 
     # # mmdetection3d
-    # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
+    - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
 
     # # mmsegmentation
-    # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
-    # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
-    # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
-    # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
+    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
+    - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
+    - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
+    - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
 
     # # mmpose
-    # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
+    - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
 
-    # # # # mmaction2
-    # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
+    # # # mmaction2
+    - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
 
-    # # # # mmocr
-    # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
-    # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
+    # # # mmocr
+    - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
+    - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
 
-    # # # mmyolo
-    # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
+    # # mmyolo
+    - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
 
-    # # # DI-engine
-    # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
-    # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
+    # # DI-engine
+    - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
+    - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
 
 cuda:
     # transformers
-    # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
-    # # # mmpretrain
-    # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
-    # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
-    # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
-    # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
-    # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
-    # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
-    # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
-    # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
-    #   precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
-    # # mmdetection
-    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
-    # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
-    # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
-    # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
-    # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
-    # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
-    # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead"
-    # # mmpose
-    # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
-    # # mmaction2
-    # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
-    # # mmocr
-    # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
-    # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
-    # # mmsegmentation
-    # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
-    # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
-    # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
-    # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
-    # # mmyolo
-    # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
-    # # mmdetection3d
-    # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
-    # # DI-engine
-    # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
-    # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
+    - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
+    # # mmpretrain
+    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
+    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+    - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
+    - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
+    - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
+    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
+      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    # mmdetection
+    - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
+    - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
+    - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd"
+    - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos"
+    - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet"
+    - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn"
+    - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn"
+    - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead"
+    # mmpose
+    - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet"
+    # mmaction2
+    # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn"
+    # mmocr
+    - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn"
+    - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet"
+    # mmsegmentation
+    - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
+    - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
+    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
+    - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet"
+    # mmyolo
+    - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast"
+    # mmdetection3d
+    - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars"
+    # DI-engine
+    - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo"
+    - model_cfg: "DI-engine ding/example/sac.py workdirs_sac"
     # mmagic
     # - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable_diffusion"
 
 ascend:
-    # mmsegmentation
-    # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
-    #   fallback_op_list: "nll_loss2d_forward,nll_loss2d_backward,native_batch_norm,topk,convolution_overrideable,convolution_backward_overrideable,native_batch_norm_backward"
-    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    #   fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable"
     # mmpretrain
-    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
     - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2"
-    # mmdetection
-    # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr"
-    # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"
+      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
+    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+
+    # mmsegmentation
+    - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3"
+    - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus"
+    - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet"
 
 kunlunxin:
     # mmpretrain
-    # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
+    - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI
index 97342d4f9..abb56e204 160000
--- a/dipu/third_party/DIOPI
+++ b/dipu/third_party/DIOPI
@@ -1 +1 @@
-Subproject commit 97342d4f9c6bf6f821c7726fbee893e7645d4e80
+Subproject commit abb56e204aa379bddd7190846215e3fda0171c15
diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp
index fe626480f..5d4237287 100644
--- a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp
+++ b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp
@@ -198,17 +198,25 @@ class CopyParamsInfo {
   }
 
   explicit CopyParamsInfo(const at::Tensor& dst, const at::Tensor& src,
-                          const DIPUStream& curStream) {
-    // assume layout always = not suppport Sparse layout
+                          const DIPUStream& curStream)
+      : curStream_(curStream) {
     TORCH_CHECK(dst.options().layout() == c10::Layout::Strided,
                 "only Strided layout is supported");
     copyType_ = getCopyType(dst, src);
-    curStream_ = curStream;
 
     recomputeTensorsInfo(dst, src);
   }
+  explicit CopyParamsInfo(const at::Tensor& dst, const at::Tensor& src) {
+    TORCH_CHECK(dst.options().layout() == c10::Layout::Strided,
+                "only Strided layout is supported");
+    copyType_ = getCopyType(dst, src);
 
+    recomputeTensorsInfo(dst, src);
+  }
   void updateCopyType(DIPUCopyType copyType) { copyType_ = copyType; }
+  void updateCurrentStream(const DIPUStream& curStream) {
+    curStream_ = curStream;
+  }
 };
 
 inline void doSrcStreamWaitDstStream(const CopyParamsInfo& info,
diff --git a/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp b/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp
index bd9b30dd2..d52696a09 100644
--- a/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp
+++ b/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp
@@ -7,6 +7,7 @@
 
 #include "csrc_dipu/aten/ops/NodispatchUtils.hpp"
 #include "csrc_dipu/profiler/profiler.h"
+#include "csrc_dipu/runtime/devproxy/deviceproxy.h"
 
 namespace diopihelper = dipu::diopi_helper;
 using dipu::profile::RecordBlockCreator;
@@ -113,6 +114,11 @@ DIOPI_RT_API diopiError_t diopiGetTensorDeviceIndex(
   return diopiSuccess;
 }
 
+DIOPI_RT_API diopiError_t
+diopiGetCurrentDeviceIndex(diopiDeviceIndex_t* pDevIndex) {
+  *pDevIndex = dipu::devproxy::current_device();
+  return diopiSuccess;
+}
 DIOPI_RT_API diopiError_t diopiGetStream(diopiContextHandle_t ctx,
                                          diopiStreamHandle_t* stream) {
   *stream = ctx->stream;
diff --git a/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp b/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp
index fabac2908..1c18c8bda 100644
--- a/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp
+++ b/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp
@@ -15,25 +15,45 @@ class CUDACopyInplace : public DIPUCopyInpOnDIOPI {
   CUDACopyInplace() = default;
   ~CUDACopyInplace() override = default;
 
-  // diopi-cuda copy use aten, so it can handle between-device case.
-  void copyNodirectBetweenDevices(at::Tensor& dst, const at::Tensor& src,
-                                  bool non_blocking,
-                                  CopyParamsInfo& info) override {
-    dipu_wrap_diopi_copy_inp(dst, src, non_blocking);
-  }
+  void run(at::Tensor& dst, const at::Tensor& src, bool non_blocking) override {
+    TORCH_CHECK(dst.defined(), "dst is undefined");
+    TORCH_CHECK(src.defined(), "src is undefined");
+    if (dst.numel() == 0 || dst.is_same(src)) {
+      return;
+    }
+    auto info = CopyParamsInfo(dst, src);
+    if (info.copyType_ == DIPUCopyType::D2Self) {
+      non_blocking = true;
+    }
+
+    // Exit early if dst and src are views of the same data
+    if ((dst.is_alias_of(src) && dst.storage_offset() == src.storage_offset() &&
+         info.sameStride_ && info.sameDtype_)) {
+      return;
+    }
+
+    if (native::dumpOpArgLevel() > 1) {
+      std::cout << "    DIPUCopyInplace.run:    dst:" << native::dumpArg(dst)
+                << std::endl;
+      std::cout << "    DIPUCopyInplace.run::   src:" << native::dumpArg(src)
+                << std::endl;
+    }
 
- protected:
-  void copyPostProcess(const at::Tensor& dst, const at::Tensor& src,
-                       bool non_blocking, const CopyParamsInfo& info,
-                       DIPUStream& curStream) override {
-    // 1. block_cpu_d2d=False on cuda, because we do not need sync stream when
-    // copy on two devices, just wait between stream
-    // 2. block_cpu_h2d=False on cuda, We do not need sync stream if cpu tensor
-    // is not pin memory which stay consistent with
-    // aten/src/ATen/native/cuda/Copy.cu.
-    tryRecordOrSyncStream(info, dst, src, curStream, non_blocking,
-                          /* block_cpu_d2d = */ false,
-                          /* block_cpu_h2d = */ false);
+    switch (info.copyType_) {
+      case DIPUCopyType::D2Self:
+      case DIPUCopyType::D2OtherD:
+        dipu_wrap_diopi_copy_inp(dst, src, non_blocking);
+        break;
+      default: {
+        const DIPUGuard guard((!src.is_cpu()) ? src.device() : dst.device());
+        auto curStream = dipu::getCurrentDIPUStream();
+        info.updateCurrentStream(curStream);
+        copyAll(dst, src, non_blocking, info);
+        tryRecordOrSyncStream(info, dst, src, curStream, non_blocking,
+                              /* block_cpu_d2d = */ false,
+                              /* block_cpu_h2d = */ false);
+      }
+    }
   }
 };
 
diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp b/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp
index f24ea7db5..d23cd0936 100644
--- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp
+++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp
@@ -1,7 +1,11 @@
 #include <stdexcept>
 #include <string>
+#include <type_traits>
 
 #include <c10/core/ScalarType.h>
+
+#include "csrc_dipu/runtime/device/basedef.h"
+#include "csrc_dipu/runtime/devproxy/deviceproxy.h"
 #ifdef USE_PCCL
 #include <pccl.h>
 #endif  // USE_PCCL
@@ -176,68 +180,134 @@ DIPU_API diclResult_t diclRecv(void* recvbuff, size_t count,
 
 #else  // USE_PCCL
 
+namespace {
+
+using diclCommValue_t = std::remove_pointer_t<diclComm_t>;
+constexpr diclCommValue_t kMagicComm = 0x5043434C;  // "PCCL"
+
+diclComm_t createDiclComm() { return new diclCommValue_t(kMagicComm); }
+
+void destroyDiclComm(diclComm_t comm) { delete comm; }
+
+void checkCommOrThrow(diclComm_t comm) {
+  if (comm == nullptr || *comm != kMagicComm) {
+    throw std::runtime_error("Invalid comm.");
+  }
+}
+
+[[noreturn]] void throwNotSupportedError() {
+  throw std::runtime_error(
+      "PCCL is not enabled. DIPU only allows single GPU communication.");
+}
+
+void checkNrankOrThrow(int nranks) {
+  if (nranks != 1) {
+    throwNotSupportedError();
+  }
+}
+
+void checkRankOrThrow(int rank) {
+  if (rank != 0) {
+    throwNotSupportedError();
+  }
+}
+
+void singleDeviceMemcpy(deviceStream_t stream, void* dst, const void* src,
+                        size_t nbytes) {
+  auto device = devproxy::current_device();
+  devproxy::memCopyD2DAsync(stream, nbytes, device, dst, device, src);
+}
+
+}  // namespace
+
 const int DICL_UNIQUE_ID_BYTES_SIZE = 0;
 
 DIPU_API diclResult_t diclGetCommAsyncError(diclComm_t comm) {
-  return DICL_ERR_UNDEF;
+  checkCommOrThrow(comm);
+  return DICL_SUCCESS;
 }
 
-DIPU_API diclResult_t diclGetUniqueId(pcclUniqueId* uniqueId) {
-  return DICL_ERR_UNDEF;
+DIPU_API diclResult_t diclGetUniqueId(commUniqueId* uniqueId) {
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclCommInitRank(diclComm_t* comm, int nranks,
-                                       pcclUniqueId uniqueId, int rank,
+                                       commUniqueId uniqueId, int rank,
                                        int localDeviceId) {
-  return DICL_ERR_UNDEF;
+  checkNrankOrThrow(nranks);
+  checkRankOrThrow(rank);
+  DIPU_LOGW(
+      "PCCL is not enabled. DIPU will simulate single GPU "
+      "communication using memcpy.");
+  *comm = createDiclComm();
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclCommDestroy(diclComm_t comm) {
-  return DICL_ERR_UNDEF;
+  checkCommOrThrow(comm);
+  destroyDiclComm(comm);
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclAllReduce(const void* sendbuff, void* recvbuff,
                                     size_t count, at::ScalarType datatype,
                                     const ReduceOp& reduceOp, diclComm_t comm,
                                     deviceStream_t stream) {
-  return DICL_ERR_UNDEF;
+  checkCommOrThrow(comm);
+  singleDeviceMemcpy(stream, recvbuff, sendbuff,
+                     count * at::elementSize(datatype));
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclBroadcast(const void* sendbuff, void* recvbuff,
                                     size_t count, at::ScalarType datatype,
                                     int root, diclComm_t comm,
                                     deviceStream_t stream) {
-  return DICL_ERR_UNDEF;
+  checkCommOrThrow(comm);
+  singleDeviceMemcpy(stream, recvbuff, sendbuff,
+                     count * at::elementSize(datatype));
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclAllGather(const void* sendBuf, void* recvBuf,
                                     size_t count, at::ScalarType datatype,
                                     diclComm_t comm, deviceStream_t stream) {
-  return DICL_ERR_UNDEF;
+  checkCommOrThrow(comm);
+  singleDeviceMemcpy(stream, recvBuf, sendBuf,
+                     count * at::elementSize(datatype));
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclReduce(const void* sendbuff, void* recvbuff,
                                  size_t count, at::ScalarType datatype,
                                  const ReduceOp& reduceOp, int root,
                                  diclComm_t comm, deviceStream_t stream) {
-  return DICL_ERR_UNDEF;
+  checkCommOrThrow(comm);
+  checkRankOrThrow(root);
+  singleDeviceMemcpy(stream, recvbuff, sendbuff,
+                     count * at::elementSize(datatype));
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclReduceScatter(
     void* sendBuf, void* recvBuf, size_t recvCount, at::ScalarType datatype,
     const ReduceOp& reduceOp, diclComm_t comm, deviceStream_t stream) {
-  return DICL_ERR_UNDEF;
+  singleDeviceMemcpy(stream, recvBuf, sendBuf,
+                     recvCount * at::elementSize(datatype));
+  return DICL_SUCCESS;
 }
 
 DIPU_API diclResult_t diclSend(const void* sendbuff, size_t count,
                                at::ScalarType datatype, int peer,
                                diclComm_t comm, deviceStream_t stream) {
+  throwNotSupportedError();
   return DICL_ERR_UNDEF;
 }
 
 DIPU_API diclResult_t diclRecv(void* recvbuff, size_t count,
                                at::ScalarType datatype, int peer,
                                diclComm_t comm, deviceStream_t stream) {
+  throwNotSupportedError();
   return DICL_ERR_UNDEF;
 }
 
diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h b/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h
index 22ff9670b..91ee934f0 100644
--- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h
+++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h
@@ -30,10 +30,8 @@ using deviceHandle_t = tangContext_t*;
 using diclComm_t = pcclComm_t;
 using commUniqueId = pcclUniqueId;
 #else   // USE_PCCL
-class pcclComm_t {};
-using diclComm_t = pcclComm_t*;
-class pcclUniqueId {};
-using commUniqueId = pcclUniqueId;
+using diclComm_t = uint32_t*;
+struct commUniqueId {};
 #endif  // USE_PCCL
 
 }  // namespace dipu
diff --git a/dipu/torch_dipu/dipu/dataloader.py b/dipu/torch_dipu/dipu/dataloader.py
index e4e812219..510a202dc 100644
--- a/dipu/torch_dipu/dipu/dataloader.py
+++ b/dipu/torch_dipu/dipu/dataloader.py
@@ -57,7 +57,15 @@ def __init__(
             )
             pin_memory = False
         elif pin_memory:
-            pin_memory_device = "cuda"
+            import os
+
+            mockcuda = (
+                False
+                if os.environ.get("DIPU_MOCK_CUDA", "True").lower() == "false"
+                else True
+            )
+            if mockcuda == True:
+                pin_memory_device = "cuda"
 
         super().__init__(
             dataset,

From 00cf16cbbb6649b08c91ee03d9ba013bb855babe Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Thu, 25 Jul 2024 20:48:21 +0800
Subject: [PATCH 10/12] change sh to bash to debug

---
 dipu/scripts/ci/ci_run_one_iter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dipu/scripts/ci/ci_run_one_iter.py b/dipu/scripts/ci/ci_run_one_iter.py
index e0111466b..2f2f68353 100644
--- a/dipu/scripts/ci/ci_run_one_iter.py
+++ b/dipu/scripts/ci/ci_run_one_iter.py
@@ -123,8 +123,8 @@ def process_one_iter(log_file, clear_log, model_info: dict) -> None:
             cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 python {train_path}"
             cmd_cp_one_iter = ""
         else:
-            cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
-            cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
+            cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
+            cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
     elif device == "ascend":
         if "infer" in p2 and "infer" in p3:
             cmd_run_one_iter = f"python {train_path}"

From ffefc1fb460cb47ae1f0bd2ae73f1217fddf9de9 Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Thu, 25 Jul 2024 22:28:30 +0800
Subject: [PATCH 11/12] test camb bash

---
 dipu/scripts/ci/ci_run_one_iter.py                   |  6 ++++--
 .../ci/test_one_iter_traditional_model_list.yaml     | 12 ++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/dipu/scripts/ci/ci_run_one_iter.py b/dipu/scripts/ci/ci_run_one_iter.py
index 2f2f68353..ae012ee52 100644
--- a/dipu/scripts/ci/ci_run_one_iter.py
+++ b/dipu/scripts/ci/ci_run_one_iter.py
@@ -123,8 +123,10 @@ def process_one_iter(log_file, clear_log, model_info: dict) -> None:
             cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 python {train_path}"
             cmd_cp_one_iter = ""
         else:
-            cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
-            cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
+            cmd_run_one_iter = f"bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
+            cmd_cp_one_iter = f"bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
+            # cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}"
+            # cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition}  --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}"
     elif device == "ascend":
         if "infer" in p2 and "infer" in p3:
             cmd_run_one_iter = f"python {train_path}"
diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
index 39d29d1b9..63ea7f7e1 100644
--- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
+++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml
@@ -1,18 +1,18 @@
 camb:
     # # transformers
-    - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
+    # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert"
 
     # # mmpretrain
     - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet"
-    - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
-    - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
+    # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer"
+    # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer"
     - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory"
       precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
     - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3"
     - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet"
-    - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
-    - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
-      precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
+    # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext"
+    # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2"
+    #  precision: {atol: 0.015, metric: 0.015, rtol: 0.01}
 
     # # mmdetection
     - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3"

From f284df067b909cd90636cef7a6a835811b4bd9cd Mon Sep 17 00:00:00 2001
From: Lantian Zhang <1105976166@qq.com>
Date: Thu, 25 Jul 2024 22:30:13 +0800
Subject: [PATCH 12/12] remain pin memory to original

---
 dipu/torch_dipu/dipu/dataloader.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/dipu/torch_dipu/dipu/dataloader.py b/dipu/torch_dipu/dipu/dataloader.py
index 510a202dc..e4e812219 100644
--- a/dipu/torch_dipu/dipu/dataloader.py
+++ b/dipu/torch_dipu/dipu/dataloader.py
@@ -57,15 +57,7 @@ def __init__(
             )
             pin_memory = False
         elif pin_memory:
-            import os
-
-            mockcuda = (
-                False
-                if os.environ.get("DIPU_MOCK_CUDA", "True").lower() == "false"
-                else True
-            )
-            if mockcuda == True:
-                pin_memory_device = "cuda"
+            pin_memory_device = "cuda"
 
         super().__init__(
             dataset,