From 3a679ffe81e2192e47fb3407edea69f32ddf2b35 Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Fri, 19 Jul 2024 17:02:51 +0800 Subject: [PATCH 01/12] change tests model list to debug ascend ci --- .../test_one_iter_traditional_model_list.yaml | 150 +++++++++--------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index 54585ffcc..fcb32782b 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -1,98 +1,98 @@ camb: # # transformers - - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" + # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" # # mmpretrain - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" - - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" - - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" - - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory" - precision: {atol: 0.015, metric: 0.015, rtol: 0.01} - - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" - - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" - - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" - - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" - precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" + # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" + # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory" + # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" + # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" + # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" + # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" + # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} # # mmdetection - - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" - - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" - - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" + # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" + # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" + # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" + # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" # model problem # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead" - - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" - precision: {atol: 0.025, metric: 0.02, rtol: 0.02} - - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" - - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" + # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" + # precision: {atol: 0.025, metric: 0.02, rtol: 0.02} + # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" + # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" # # mmdetection3d - - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" + # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" # # mmsegmentation - - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" - - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" - - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" - - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" + # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" + # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" + # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" + # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" # # mmpose - - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" + # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" - # # # mmaction2 - - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + # # # # mmaction2 + # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" - # # # mmocr - - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" + # # # # mmocr + # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" + # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" - # # mmyolo - - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" + # # # mmyolo + # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" - # # DI-engine - - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" + # # # DI-engine + # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" cuda: # transformers - - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" - # # mmpretrain - - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" - - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" - - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" - - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" - - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" - - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2" - - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" - - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" - precision: {atol: 0.015, metric: 0.015, rtol: 0.01} - # mmdetection - - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" - - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" - - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" - - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" - - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" - - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" - - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead" - # mmpose - - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" - # mmaction2 - # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" - # mmocr - - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" - # mmsegmentation - - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" - - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" - - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" - - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" - # mmyolo - - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" - # mmdetection3d - - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" - # DI-engine - - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" + # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" + # # # mmpretrain + # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" + # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" + # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" + # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" + # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2" + # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" + # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" + # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + # # mmdetection + # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" + # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" + # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" + # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" + # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" + # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" + # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" + # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead" + # # mmpose + # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" + # # mmaction2 + # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + # # mmocr + # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" + # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" + # # mmsegmentation + # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" + # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" + # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" + # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" + # # mmyolo + # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" + # # mmdetection3d + # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" + # # DI-engine + # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" # mmagic # - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable_diffusion" @@ -103,8 +103,8 @@ ascend: # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" # fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable" # mmpretrain - # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" kunlunxin: # mmpretrain - - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" From cab07d8e74cbc314d5502966b20c7f72c8e6056e Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Mon, 22 Jul 2024 17:07:35 +0800 Subject: [PATCH 02/12] fix conv2d memory format bug --- dipu/third_party/DIOPI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI index f33727c25..7508ef066 160000 --- a/dipu/third_party/DIOPI +++ b/dipu/third_party/DIOPI @@ -1 +1 @@ -Subproject commit f33727c25dd4811932afa42200bbb956fe1e3e2c +Subproject commit 7508ef0665e1ad967b94d5efe0ef24ffc8f01fe4 From fa445a88d1f476e795501837f110ed0d006de5d9 Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Mon, 22 Jul 2024 18:46:43 +0800 Subject: [PATCH 03/12] fix ascend memory format config on conv2d --- dipu/third_party/DIOPI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI index 7508ef066..2e14e2cf9 160000 --- a/dipu/third_party/DIOPI +++ b/dipu/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 7508ef0665e1ad967b94d5efe0ef24ffc8f01fe4 +Subproject commit 2e14e2cf99530976338874728facbe2021537c93 From 3c045e2d079cd77335e0b1003d012e1b2028492b Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Mon, 22 Jul 2024 18:57:06 +0800 Subject: [PATCH 04/12] another fix ascend memory format config on conv2d --- dipu/third_party/DIOPI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI index 2e14e2cf9..55b93f272 160000 --- a/dipu/third_party/DIOPI +++ b/dipu/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 2e14e2cf99530976338874728facbe2021537c93 +Subproject commit 55b93f272d430e918c0ff780a6b885a584b60921 From 6fb9fd279971d2b8bc82ec35655d0348e36c1f3e Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Mon, 22 Jul 2024 20:14:26 +0800 Subject: [PATCH 05/12] revert to a older DIOPI commit --- dipu/third_party/DIOPI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI index 55b93f272..97342d4f9 160000 --- a/dipu/third_party/DIOPI +++ b/dipu/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 55b93f272d430e918c0ff780a6b885a584b60921 +Subproject commit 97342d4f9c6bf6f821c7726fbee893e7645d4e80 From 05c2a470b30456f067eff9516bf0fb138508bbc0 Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Tue, 23 Jul 2024 12:19:41 +0800 Subject: [PATCH 06/12] update some one-iter-test model --- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index fcb32782b..bbe574948 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -104,6 +104,9 @@ ascend: # fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable" # mmpretrain - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + # mmdetection + - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" + - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" kunlunxin: # mmpretrain From 7f84a56e0ed918bffa7c88dfbcb0bbc22f0c62d6 Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Tue, 23 Jul 2024 13:29:29 +0800 Subject: [PATCH 07/12] revert model to check CI bug --- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index bbe574948..dcdd7e226 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -105,8 +105,8 @@ ascend: # mmpretrain - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" # mmdetection - - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" + # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" + # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" kunlunxin: # mmpretrain From 0b0649790665134487194bac28e415e638cc3828 Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Wed, 24 Jul 2024 15:09:37 +0800 Subject: [PATCH 08/12] add mobile v2 --- dipu/scripts/ci/test_one_iter_traditional_model_list.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index dcdd7e226..ce4bd473d 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -6,7 +6,7 @@ camb: - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" - # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory" + - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory" # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" @@ -104,6 +104,7 @@ ascend: # fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable" # mmpretrain - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2" # mmdetection # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" From c9ad988109c7fca889ff2fd2c56b2c9a25d2497a Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Thu, 25 Jul 2024 19:03:56 +0800 Subject: [PATCH 09/12] update branch to latest --- .../diopi_functions.yaml | 21 +++ .../test_one_iter_traditional_model_list.yaml | 166 +++++++++--------- dipu/third_party/DIOPI | 2 +- .../csrc_dipu/aten/ops/DIPUCopy.hpp | 14 +- .../csrc_dipu/diopirt/diopirt_impl.cpp | 6 + .../csrc_dipu/vendor/cuda/CUDACopyInplace.cpp | 56 ++++-- .../vendor/droplet/communicatorimpl.cpp | 92 ++++++++-- .../csrc_dipu/vendor/droplet/vendorapi.h | 6 +- dipu/torch_dipu/dipu/dataloader.py | 10 +- 9 files changed, 253 insertions(+), 120 deletions(-) diff --git a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml index 2308ba7a3..cba2a4a7d 100755 --- a/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml +++ b/dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml @@ -2633,6 +2633,27 @@ return; interface: diopiMulInpScalar(ctx, self, other, alpha) +- schema: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> () + dummy_call_diopi: True + custom_code_at_the_beginning: | + auto selfVec = self.vec(); + for (size_t i = 0;i < self.size();i++) { + dipu_mul__tensor(selfVec[i], other); + } + return; + interface: diopiMulInp(ctx, self, other, alpha) + +- schema: _foreach_mul.Tensor(Tensor[] self, Tensor other) -> Tensor[] + dummy_call_diopi: True + custom_code_at_the_beginning: | + std::vector out(self.size()); + for (size_t i = 0;i < self.size();i++) { + out[i] = nodispatch::empty_like(self[i]); + dipu_mul_out(self[i], other, out[i]); + } + return out; + interface: diopiMul(ctx, out, self, other) + - schema: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> () dummy_call_diopi: True custom_code_at_the_beginning: | diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index ce4bd473d..39d29d1b9 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -1,114 +1,116 @@ camb: # # transformers - # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" + - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" # # mmpretrain - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" - # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" - # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" + - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" + - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory" - # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} - # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" - # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" - # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" - # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" - # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" + - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" + - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" + - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" + precision: {atol: 0.015, metric: 0.015, rtol: 0.01} # # mmdetection - # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" - # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" - # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" + - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" + - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" + - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" + - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" # model problem # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead" - # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" - # precision: {atol: 0.025, metric: 0.02, rtol: 0.02} - # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" - # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" + - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" + precision: {atol: 0.025, metric: 0.02, rtol: 0.02} + - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" + - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" # # mmdetection3d - # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" + - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" # # mmsegmentation - # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" - # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" - # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" - # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" + - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" + - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" + - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" + - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" # # mmpose - # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" + - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" - # # # # mmaction2 - # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + # # # mmaction2 + - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" - # # # # mmocr - # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" + # # # mmocr + - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" + - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" - # # # mmyolo - # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" + # # mmyolo + - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" - # # # DI-engine - # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" + # # DI-engine + - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" cuda: # transformers - # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" - # # # mmpretrain - # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" - # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" - # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" - # - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" - # - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" - # - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2" - # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" - # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" - # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} - # # mmdetection - # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" - # - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" - # - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" - # - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" - # - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" - # - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" - # - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead" - # # mmpose - # - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" - # # mmaction2 - # # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" - # # mmocr - # - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" - # - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" - # # mmsegmentation - # - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" - # - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" - # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" - # - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" - # # mmyolo - # - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" - # # mmdetection3d - # - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" - # # DI-engine - # - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" - # - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" + - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" + # # mmpretrain + - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" + - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" + - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" + - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" + - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2" + - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" + - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" + precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + # mmdetection + - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" + - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" + - model_cfg: "mmdetection ssd/ssd300_coco.py workdirs_ssd" + - model_cfg: "mmdetection fcos/fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco.py workdirs_fcos" + - model_cfg: "mmdetection retinanet/retinanet_r50_fpn_1x_coco.py workdirs_retinanet" + - model_cfg: "mmdetection mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py workdirs_mask_rcnn" + - model_cfg: "mmdetection faster_rcnn/faster-rcnn_r101_fpn_1x_coco.py workdirs_faster_rcnn" + - model_cfg: "mmdetection dyhead/atss_r50_fpn_dyhead_1x_coco.py workdirs_dyhead" + # mmpose + - model_cfg: "mmpose body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py workdirs_hrnet" + # mmaction2 + # - model_cfg: "mmaction2 recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py workdirs_tsn" + # mmocr + - model_cfg: "mmocr textrecog/crnn/crnn_mini-vgg_5e_mj.py workdirs_crnn" + - model_cfg: "mmocr textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py workdirs_dbnet" + # mmsegmentation + - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" + - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" + - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" + - model_cfg: "mmsegmentation pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_pspnet" + # mmyolo + - model_cfg: "mmyolo yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py workdirs_yolov5_fast" + # mmdetection3d + - model_cfg: "mmdetection3d pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py workdirs_pointpillars" + # DI-engine + - model_cfg: "DI-engine ding/example/ppo.py workdirs_ppo" + - model_cfg: "DI-engine ding/example/sac.py workdirs_sac" # mmagic # - model_cfg: "mmagic stable_diffusion/stable-diffusion_ddim_denoisingunet_infer.py workdirs_stable_diffusion" ascend: - # mmsegmentation - # - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" - # fallback_op_list: "nll_loss2d_forward,nll_loss2d_backward,native_batch_norm,topk,convolution_overrideable,convolution_backward_overrideable,native_batch_norm_backward" - # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - # fallback_op_list: "fill_.Scalar,baddbmm.out,where.self,linear_backward,linear,uniform_,any.all_out,_foreach_addcdiv_.ScalarList,native_batch_norm_backward,convolution_overrideable" # mmpretrain - - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" + - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2" - # mmdetection - # - model_cfg: "mmdetection detr/detr_r50_8xb2-150e_coco.py workdirs_detr" - # - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" + precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" + - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" + + # mmsegmentation + - model_cfg: "mmsegmentation deeplabv3/deeplabv3_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3" + - model_cfg: "mmsegmentation deeplabv3plus/deeplabv3plus_r50-d8_4xb2-40k_cityscapes-512x1024.py workdirs_deeplabv3plus" + - model_cfg: "mmsegmentation unet/unet-s5-d16_fcn_4xb4-160k_cityscapes-512x1024.py workdirs_unet" kunlunxin: # mmpretrain - # - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" + - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" diff --git a/dipu/third_party/DIOPI b/dipu/third_party/DIOPI index 97342d4f9..abb56e204 160000 --- a/dipu/third_party/DIOPI +++ b/dipu/third_party/DIOPI @@ -1 +1 @@ -Subproject commit 97342d4f9c6bf6f821c7726fbee893e7645d4e80 +Subproject commit abb56e204aa379bddd7190846215e3fda0171c15 diff --git a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp index fe626480f..5d4237287 100644 --- a/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp +++ b/dipu/torch_dipu/csrc_dipu/aten/ops/DIPUCopy.hpp @@ -198,17 +198,25 @@ class CopyParamsInfo { } explicit CopyParamsInfo(const at::Tensor& dst, const at::Tensor& src, - const DIPUStream& curStream) { - // assume layout always = not suppport Sparse layout + const DIPUStream& curStream) + : curStream_(curStream) { TORCH_CHECK(dst.options().layout() == c10::Layout::Strided, "only Strided layout is supported"); copyType_ = getCopyType(dst, src); - curStream_ = curStream; recomputeTensorsInfo(dst, src); } + explicit CopyParamsInfo(const at::Tensor& dst, const at::Tensor& src) { + TORCH_CHECK(dst.options().layout() == c10::Layout::Strided, + "only Strided layout is supported"); + copyType_ = getCopyType(dst, src); + recomputeTensorsInfo(dst, src); + } void updateCopyType(DIPUCopyType copyType) { copyType_ = copyType; } + void updateCurrentStream(const DIPUStream& curStream) { + curStream_ = curStream; + } }; inline void doSrcStreamWaitDstStream(const CopyParamsInfo& info, diff --git a/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp b/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp index bd9b30dd2..d52696a09 100644 --- a/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp +++ b/dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp @@ -7,6 +7,7 @@ #include "csrc_dipu/aten/ops/NodispatchUtils.hpp" #include "csrc_dipu/profiler/profiler.h" +#include "csrc_dipu/runtime/devproxy/deviceproxy.h" namespace diopihelper = dipu::diopi_helper; using dipu::profile::RecordBlockCreator; @@ -113,6 +114,11 @@ DIOPI_RT_API diopiError_t diopiGetTensorDeviceIndex( return diopiSuccess; } +DIOPI_RT_API diopiError_t +diopiGetCurrentDeviceIndex(diopiDeviceIndex_t* pDevIndex) { + *pDevIndex = dipu::devproxy::current_device(); + return diopiSuccess; +} DIOPI_RT_API diopiError_t diopiGetStream(diopiContextHandle_t ctx, diopiStreamHandle_t* stream) { *stream = ctx->stream; diff --git a/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp b/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp index fabac2908..1c18c8bda 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp +++ b/dipu/torch_dipu/csrc_dipu/vendor/cuda/CUDACopyInplace.cpp @@ -15,25 +15,45 @@ class CUDACopyInplace : public DIPUCopyInpOnDIOPI { CUDACopyInplace() = default; ~CUDACopyInplace() override = default; - // diopi-cuda copy use aten, so it can handle between-device case. - void copyNodirectBetweenDevices(at::Tensor& dst, const at::Tensor& src, - bool non_blocking, - CopyParamsInfo& info) override { - dipu_wrap_diopi_copy_inp(dst, src, non_blocking); - } + void run(at::Tensor& dst, const at::Tensor& src, bool non_blocking) override { + TORCH_CHECK(dst.defined(), "dst is undefined"); + TORCH_CHECK(src.defined(), "src is undefined"); + if (dst.numel() == 0 || dst.is_same(src)) { + return; + } + auto info = CopyParamsInfo(dst, src); + if (info.copyType_ == DIPUCopyType::D2Self) { + non_blocking = true; + } + + // Exit early if dst and src are views of the same data + if ((dst.is_alias_of(src) && dst.storage_offset() == src.storage_offset() && + info.sameStride_ && info.sameDtype_)) { + return; + } + + if (native::dumpOpArgLevel() > 1) { + std::cout << " DIPUCopyInplace.run: dst:" << native::dumpArg(dst) + << std::endl; + std::cout << " DIPUCopyInplace.run:: src:" << native::dumpArg(src) + << std::endl; + } - protected: - void copyPostProcess(const at::Tensor& dst, const at::Tensor& src, - bool non_blocking, const CopyParamsInfo& info, - DIPUStream& curStream) override { - // 1. block_cpu_d2d=False on cuda, because we do not need sync stream when - // copy on two devices, just wait between stream - // 2. block_cpu_h2d=False on cuda, We do not need sync stream if cpu tensor - // is not pin memory which stay consistent with - // aten/src/ATen/native/cuda/Copy.cu. - tryRecordOrSyncStream(info, dst, src, curStream, non_blocking, - /* block_cpu_d2d = */ false, - /* block_cpu_h2d = */ false); + switch (info.copyType_) { + case DIPUCopyType::D2Self: + case DIPUCopyType::D2OtherD: + dipu_wrap_diopi_copy_inp(dst, src, non_blocking); + break; + default: { + const DIPUGuard guard((!src.is_cpu()) ? src.device() : dst.device()); + auto curStream = dipu::getCurrentDIPUStream(); + info.updateCurrentStream(curStream); + copyAll(dst, src, non_blocking, info); + tryRecordOrSyncStream(info, dst, src, curStream, non_blocking, + /* block_cpu_d2d = */ false, + /* block_cpu_h2d = */ false); + } + } } }; diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp b/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp index f24ea7db5..d23cd0936 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/communicatorimpl.cpp @@ -1,7 +1,11 @@ #include #include +#include #include + +#include "csrc_dipu/runtime/device/basedef.h" +#include "csrc_dipu/runtime/devproxy/deviceproxy.h" #ifdef USE_PCCL #include #endif // USE_PCCL @@ -176,68 +180,134 @@ DIPU_API diclResult_t diclRecv(void* recvbuff, size_t count, #else // USE_PCCL +namespace { + +using diclCommValue_t = std::remove_pointer_t; +constexpr diclCommValue_t kMagicComm = 0x5043434C; // "PCCL" + +diclComm_t createDiclComm() { return new diclCommValue_t(kMagicComm); } + +void destroyDiclComm(diclComm_t comm) { delete comm; } + +void checkCommOrThrow(diclComm_t comm) { + if (comm == nullptr || *comm != kMagicComm) { + throw std::runtime_error("Invalid comm."); + } +} + +[[noreturn]] void throwNotSupportedError() { + throw std::runtime_error( + "PCCL is not enabled. DIPU only allows single GPU communication."); +} + +void checkNrankOrThrow(int nranks) { + if (nranks != 1) { + throwNotSupportedError(); + } +} + +void checkRankOrThrow(int rank) { + if (rank != 0) { + throwNotSupportedError(); + } +} + +void singleDeviceMemcpy(deviceStream_t stream, void* dst, const void* src, + size_t nbytes) { + auto device = devproxy::current_device(); + devproxy::memCopyD2DAsync(stream, nbytes, device, dst, device, src); +} + +} // namespace + const int DICL_UNIQUE_ID_BYTES_SIZE = 0; DIPU_API diclResult_t diclGetCommAsyncError(diclComm_t comm) { - return DICL_ERR_UNDEF; + checkCommOrThrow(comm); + return DICL_SUCCESS; } -DIPU_API diclResult_t diclGetUniqueId(pcclUniqueId* uniqueId) { - return DICL_ERR_UNDEF; +DIPU_API diclResult_t diclGetUniqueId(commUniqueId* uniqueId) { + return DICL_SUCCESS; } DIPU_API diclResult_t diclCommInitRank(diclComm_t* comm, int nranks, - pcclUniqueId uniqueId, int rank, + commUniqueId uniqueId, int rank, int localDeviceId) { - return DICL_ERR_UNDEF; + checkNrankOrThrow(nranks); + checkRankOrThrow(rank); + DIPU_LOGW( + "PCCL is not enabled. DIPU will simulate single GPU " + "communication using memcpy."); + *comm = createDiclComm(); + return DICL_SUCCESS; } DIPU_API diclResult_t diclCommDestroy(diclComm_t comm) { - return DICL_ERR_UNDEF; + checkCommOrThrow(comm); + destroyDiclComm(comm); + return DICL_SUCCESS; } DIPU_API diclResult_t diclAllReduce(const void* sendbuff, void* recvbuff, size_t count, at::ScalarType datatype, const ReduceOp& reduceOp, diclComm_t comm, deviceStream_t stream) { - return DICL_ERR_UNDEF; + checkCommOrThrow(comm); + singleDeviceMemcpy(stream, recvbuff, sendbuff, + count * at::elementSize(datatype)); + return DICL_SUCCESS; } DIPU_API diclResult_t diclBroadcast(const void* sendbuff, void* recvbuff, size_t count, at::ScalarType datatype, int root, diclComm_t comm, deviceStream_t stream) { - return DICL_ERR_UNDEF; + checkCommOrThrow(comm); + singleDeviceMemcpy(stream, recvbuff, sendbuff, + count * at::elementSize(datatype)); + return DICL_SUCCESS; } DIPU_API diclResult_t diclAllGather(const void* sendBuf, void* recvBuf, size_t count, at::ScalarType datatype, diclComm_t comm, deviceStream_t stream) { - return DICL_ERR_UNDEF; + checkCommOrThrow(comm); + singleDeviceMemcpy(stream, recvBuf, sendBuf, + count * at::elementSize(datatype)); + return DICL_SUCCESS; } DIPU_API diclResult_t diclReduce(const void* sendbuff, void* recvbuff, size_t count, at::ScalarType datatype, const ReduceOp& reduceOp, int root, diclComm_t comm, deviceStream_t stream) { - return DICL_ERR_UNDEF; + checkCommOrThrow(comm); + checkRankOrThrow(root); + singleDeviceMemcpy(stream, recvbuff, sendbuff, + count * at::elementSize(datatype)); + return DICL_SUCCESS; } DIPU_API diclResult_t diclReduceScatter( void* sendBuf, void* recvBuf, size_t recvCount, at::ScalarType datatype, const ReduceOp& reduceOp, diclComm_t comm, deviceStream_t stream) { - return DICL_ERR_UNDEF; + singleDeviceMemcpy(stream, recvBuf, sendBuf, + recvCount * at::elementSize(datatype)); + return DICL_SUCCESS; } DIPU_API diclResult_t diclSend(const void* sendbuff, size_t count, at::ScalarType datatype, int peer, diclComm_t comm, deviceStream_t stream) { + throwNotSupportedError(); return DICL_ERR_UNDEF; } DIPU_API diclResult_t diclRecv(void* recvbuff, size_t count, at::ScalarType datatype, int peer, diclComm_t comm, deviceStream_t stream) { + throwNotSupportedError(); return DICL_ERR_UNDEF; } diff --git a/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h b/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h index 22ff9670b..91ee934f0 100644 --- a/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h +++ b/dipu/torch_dipu/csrc_dipu/vendor/droplet/vendorapi.h @@ -30,10 +30,8 @@ using deviceHandle_t = tangContext_t*; using diclComm_t = pcclComm_t; using commUniqueId = pcclUniqueId; #else // USE_PCCL -class pcclComm_t {}; -using diclComm_t = pcclComm_t*; -class pcclUniqueId {}; -using commUniqueId = pcclUniqueId; +using diclComm_t = uint32_t*; +struct commUniqueId {}; #endif // USE_PCCL } // namespace dipu diff --git a/dipu/torch_dipu/dipu/dataloader.py b/dipu/torch_dipu/dipu/dataloader.py index e4e812219..510a202dc 100644 --- a/dipu/torch_dipu/dipu/dataloader.py +++ b/dipu/torch_dipu/dipu/dataloader.py @@ -57,7 +57,15 @@ def __init__( ) pin_memory = False elif pin_memory: - pin_memory_device = "cuda" + import os + + mockcuda = ( + False + if os.environ.get("DIPU_MOCK_CUDA", "True").lower() == "false" + else True + ) + if mockcuda == True: + pin_memory_device = "cuda" super().__init__( dataset, From 00cf16cbbb6649b08c91ee03d9ba013bb855babe Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Thu, 25 Jul 2024 20:48:21 +0800 Subject: [PATCH 10/12] change sh to bash to debug --- dipu/scripts/ci/ci_run_one_iter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dipu/scripts/ci/ci_run_one_iter.py b/dipu/scripts/ci/ci_run_one_iter.py index e0111466b..2f2f68353 100644 --- a/dipu/scripts/ci/ci_run_one_iter.py +++ b/dipu/scripts/ci/ci_run_one_iter.py @@ -123,8 +123,8 @@ def process_one_iter(log_file, clear_log, model_info: dict) -> None: cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=40 python {train_path}" cmd_cp_one_iter = "" else: - cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=40 sh SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}" - cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=30 sh SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}" + cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}" + cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}" elif device == "ascend": if "infer" in p2 and "infer" in p3: cmd_run_one_iter = f"python {train_path}" From ffefc1fb460cb47ae1f0bd2ae73f1217fddf9de9 Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Thu, 25 Jul 2024 22:28:30 +0800 Subject: [PATCH 11/12] test camb bash --- dipu/scripts/ci/ci_run_one_iter.py | 6 ++++-- .../ci/test_one_iter_traditional_model_list.yaml | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/dipu/scripts/ci/ci_run_one_iter.py b/dipu/scripts/ci/ci_run_one_iter.py index 2f2f68353..ae012ee52 100644 --- a/dipu/scripts/ci/ci_run_one_iter.py +++ b/dipu/scripts/ci/ci_run_one_iter.py @@ -123,8 +123,10 @@ def process_one_iter(log_file, clear_log, model_info: dict) -> None: cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=40 python {train_path}" cmd_cp_one_iter = "" else: - cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}" - cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}" + cmd_run_one_iter = f"bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}" + cmd_cp_one_iter = f"bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}" + # cmd_run_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=40 bash SMART/tools/one_iter_tool/run_one_iter.sh {train_path} {config_path} {work_dir} {opt_arg}" + # cmd_cp_one_iter = f"srun --job-name={job_name} --partition={partition} --gres={gpu_requests} --time=30 bash SMART/tools/one_iter_tool/compare_one_iter.sh {package_name} {atol} {rtol} {metric}" elif device == "ascend": if "infer" in p2 and "infer" in p3: cmd_run_one_iter = f"python {train_path}" diff --git a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml index 39d29d1b9..63ea7f7e1 100644 --- a/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml +++ b/dipu/scripts/ci/test_one_iter_traditional_model_list.yaml @@ -1,18 +1,18 @@ camb: # # transformers - - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" + # - model_cfg: "transformers examples/pytorch/question-answering/run_bert_qa.py workdirs_bert" # # mmpretrain - model_cfg: "mmpretrain resnet/resnet50_8xb32_in1k.py workdirs_resnet" - - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" - - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" + # - model_cfg: "mmpretrain swin_transformer/swin-base_16xb64_in1k.py workdirs_swin_transformer" + # - model_cfg: "mmpretrain vision_transformer/vit-base-p16_32xb128-mae_in1k.py workdirs_vision_transformer" - model_cfg: "mmpretrain mobilenet_v2/mobilenet-v2_8xb32_in1k.py workdirs_mobilenetv2 --no-pin-memory" precision: {atol: 0.015, metric: 0.015, rtol: 0.01} - model_cfg: "mmpretrain mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py workdirs_mobilenetv3" - model_cfg: "mmpretrain efficientnet/efficientnet-b2_8xb32_in1k.py workdirs_efficientnet" - - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" - - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" - precision: {atol: 0.015, metric: 0.015, rtol: 0.01} + # - model_cfg: "mmpretrain convnext/convnext-small_32xb128_in1k.py workdirs_convnext" + # - model_cfg: "mmpretrain shufflenet_v2/shufflenet-v2-1x_16xb64_in1k_256.py workdirs_shufflenetv2" + # precision: {atol: 0.015, metric: 0.015, rtol: 0.01} # # mmdetection - model_cfg: "mmdetection yolo/yolov3_d53_8xb8-320-273e_coco.py workdirs_yolov3" From f284df067b909cd90636cef7a6a835811b4bd9cd Mon Sep 17 00:00:00 2001 From: Lantian Zhang <1105976166@qq.com> Date: Thu, 25 Jul 2024 22:30:13 +0800 Subject: [PATCH 12/12] remain pin memory to original --- dipu/torch_dipu/dipu/dataloader.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/dipu/torch_dipu/dipu/dataloader.py b/dipu/torch_dipu/dipu/dataloader.py index 510a202dc..e4e812219 100644 --- a/dipu/torch_dipu/dipu/dataloader.py +++ b/dipu/torch_dipu/dipu/dataloader.py @@ -57,15 +57,7 @@ def __init__( ) pin_memory = False elif pin_memory: - import os - - mockcuda = ( - False - if os.environ.get("DIPU_MOCK_CUDA", "True").lower() == "false" - else True - ) - if mockcuda == True: - pin_memory_device = "cuda" + pin_memory_device = "cuda" super().__init__( dataset,