|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: igc_opt --igc-vectorizer -S -dce < %s 2>&1 | FileCheck %s |
| 3 | + |
| 4 | +define spir_kernel void @quux() { |
| 5 | +; CHECK-LABEL: @quux( |
| 6 | +; CHECK-NEXT: bb43: |
| 7 | +; CHECK-NEXT: br label [[BB123:%.*]] |
| 8 | +; CHECK: bb60: |
| 9 | +; CHECK-NEXT: br label [[BB88:%.*]] |
| 10 | +; CHECK: bb88: |
| 11 | +; CHECK-NEXT: [[TMP90:%.*]] = phi float [ 0.000000e+00, [[BB60:%.*]] ], [ [[TMP114:%.*]], [[BB88]] ] |
| 12 | +; CHECK-NEXT: [[TMP91:%.*]] = phi float [ -0.000000e+00, [[BB60]] ], [ [[TMP115:%.*]], [[BB88]] ] |
| 13 | +; CHECK-NEXT: [[TMP92:%.*]] = phi float [ 0.000000e+00, [[BB60]] ], [ [[TMP116:%.*]], [[BB88]] ] |
| 14 | +; CHECK-NEXT: [[TMP93:%.*]] = phi float [ 0.000000e+00, [[BB60]] ], [ [[TMP117:%.*]], [[BB88]] ] |
| 15 | +; CHECK-NEXT: [[TMP94:%.*]] = phi float [ 0.000000e+00, [[BB60]] ], [ [[TMP118:%.*]], [[BB88]] ] |
| 16 | +; CHECK-NEXT: [[TMP95:%.*]] = phi float [ 0.000000e+00, [[BB60]] ], [ [[TMP119:%.*]], [[BB88]] ] |
| 17 | +; CHECK-NEXT: [[TMP96:%.*]] = phi float [ 0.000000e+00, [[BB60]] ], [ [[TMP120:%.*]], [[BB88]] ] |
| 18 | +; CHECK-NEXT: [[TMP97:%.*]] = phi float [ 0.000000e+00, [[BB60]] ], [ [[TMP121:%.*]], [[BB88]] ] |
| 19 | +; CHECK-NEXT: [[TMP104:%.*]] = insertelement <8 x float> zeroinitializer, float [[TMP90]], i64 0 |
| 20 | +; CHECK-NEXT: [[TMP105:%.*]] = insertelement <8 x float> [[TMP104]], float [[TMP91]], i64 1 |
| 21 | +; CHECK-NEXT: [[TMP106:%.*]] = insertelement <8 x float> [[TMP105]], float [[TMP92]], i64 2 |
| 22 | +; CHECK-NEXT: [[TMP107:%.*]] = insertelement <8 x float> [[TMP106]], float [[TMP93]], i64 3 |
| 23 | +; CHECK-NEXT: [[TMP108:%.*]] = insertelement <8 x float> [[TMP107]], float [[TMP94]], i64 4 |
| 24 | +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <8 x float> [[TMP108]], float [[TMP95]], i64 5 |
| 25 | +; CHECK-NEXT: [[TMP110:%.*]] = insertelement <8 x float> [[TMP109]], float [[TMP96]], i64 6 |
| 26 | +; CHECK-NEXT: [[TMP111:%.*]] = insertelement <8 x float> [[TMP110]], float [[TMP97]], i64 7 |
| 27 | +; CHECK-NEXT: [[TMP112:%.*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> [[TMP111]], <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
| 28 | +; CHECK-NEXT: [[TMP113:%.*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
| 29 | +; CHECK-NEXT: [[TMP114]] = extractelement <8 x float> [[TMP113]], i64 0 |
| 30 | +; CHECK-NEXT: [[TMP115]] = extractelement <8 x float> [[TMP113]], i64 1 |
| 31 | +; CHECK-NEXT: [[TMP116]] = extractelement <8 x float> [[TMP113]], i64 2 |
| 32 | +; CHECK-NEXT: [[TMP117]] = extractelement <8 x float> [[TMP113]], i64 3 |
| 33 | +; CHECK-NEXT: [[TMP118]] = extractelement <8 x float> [[TMP113]], i64 4 |
| 34 | +; CHECK-NEXT: [[TMP119]] = extractelement <8 x float> [[TMP113]], i64 5 |
| 35 | +; CHECK-NEXT: [[TMP120]] = extractelement <8 x float> [[TMP113]], i64 6 |
| 36 | +; CHECK-NEXT: [[TMP121]] = extractelement <8 x float> [[TMP113]], i64 7 |
| 37 | +; CHECK-NEXT: br i1 false, label [[BB88]], label [[BB123]] |
| 38 | +; CHECK: bb123: |
| 39 | +; CHECK-NEXT: [[VECTORIZED_PHI:%.*]] = phi <8 x float> [ zeroinitializer, [[BB43:%.*]] ], [ [[TMP113]], [[BB88]] ] |
| 40 | +; CHECK-NEXT: [[TMP151:%.*]] = bitcast <8 x float> [[VECTORIZED_PHI]] to <8 x i32> |
| 41 | +; CHECK-NEXT: call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> [[TMP151]]) |
| 42 | +; CHECK-NEXT: ret void |
| 43 | +; |
| 44 | +bb43: |
| 45 | + br label %bb123 |
| 46 | + |
| 47 | +bb60: ; No predecessors! |
| 48 | + br label %bb88 |
| 49 | + |
| 50 | +bb88: ; preds = %bb88, %bb60 |
| 51 | + %tmp90 = phi float [ 0.000000e+00, %bb60 ], [ %tmp114, %bb88 ] |
| 52 | + %tmp91 = phi float [ -0.000000e+00, %bb60 ], [ %tmp115, %bb88 ] |
| 53 | + %tmp92 = phi float [ 0.000000e+00, %bb60 ], [ %tmp116, %bb88 ] |
| 54 | + %tmp93 = phi float [ 0.000000e+00, %bb60 ], [ %tmp117, %bb88 ] |
| 55 | + %tmp94 = phi float [ 0.000000e+00, %bb60 ], [ %tmp118, %bb88 ] |
| 56 | + %tmp95 = phi float [ 0.000000e+00, %bb60 ], [ %tmp119, %bb88 ] |
| 57 | + %tmp96 = phi float [ 0.000000e+00, %bb60 ], [ %tmp120, %bb88 ] |
| 58 | + %tmp97 = phi float [ 0.000000e+00, %bb60 ], [ %tmp121, %bb88 ] |
| 59 | + %tmp104 = insertelement <8 x float> zeroinitializer, float %tmp90, i64 0 |
| 60 | + %tmp105 = insertelement <8 x float> %tmp104, float %tmp91, i64 1 |
| 61 | + %tmp106 = insertelement <8 x float> %tmp105, float %tmp92, i64 2 |
| 62 | + %tmp107 = insertelement <8 x float> %tmp106, float %tmp93, i64 3 |
| 63 | + %tmp108 = insertelement <8 x float> %tmp107, float %tmp94, i64 4 |
| 64 | + %tmp109 = insertelement <8 x float> %tmp108, float %tmp95, i64 5 |
| 65 | + %tmp110 = insertelement <8 x float> %tmp109, float %tmp96, i64 6 |
| 66 | + %tmp111 = insertelement <8 x float> %tmp110, float %tmp97, i64 7 |
| 67 | + %tmp112 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %tmp111, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
| 68 | + %tmp113 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false) |
| 69 | + %tmp114 = extractelement <8 x float> %tmp113, i64 0 |
| 70 | + %tmp115 = extractelement <8 x float> %tmp113, i64 1 |
| 71 | + %tmp116 = extractelement <8 x float> %tmp113, i64 2 |
| 72 | + %tmp117 = extractelement <8 x float> %tmp113, i64 3 |
| 73 | + %tmp118 = extractelement <8 x float> %tmp113, i64 4 |
| 74 | + %tmp119 = extractelement <8 x float> %tmp113, i64 5 |
| 75 | + %tmp120 = extractelement <8 x float> %tmp113, i64 6 |
| 76 | + %tmp121 = extractelement <8 x float> %tmp113, i64 7 |
| 77 | + br i1 false, label %bb88, label %bb123 |
| 78 | + |
| 79 | +bb123: ; preds = %bb88, %bb43 |
| 80 | + %tmp133 = phi float [ 0.000000e+00, %bb43 ], [ %tmp114, %bb88 ] |
| 81 | + %tmp134 = phi float [ 0.000000e+00, %bb43 ], [ %tmp115, %bb88 ] |
| 82 | + %tmp135 = phi float [ 0.000000e+00, %bb43 ], [ %tmp116, %bb88 ] |
| 83 | + %tmp136 = phi float [ 0.000000e+00, %bb43 ], [ %tmp117, %bb88 ] |
| 84 | + %tmp137 = phi float [ 0.000000e+00, %bb43 ], [ %tmp118, %bb88 ] |
| 85 | + %tmp138 = phi float [ 0.000000e+00, %bb43 ], [ %tmp119, %bb88 ] |
| 86 | + %tmp139 = phi float [ 0.000000e+00, %bb43 ], [ %tmp120, %bb88 ] |
| 87 | + %tmp140 = phi float [ 0.000000e+00, %bb43 ], [ %tmp121, %bb88 ] |
| 88 | + %tmp143 = insertelement <8 x float> zeroinitializer, float %tmp133, i64 0 |
| 89 | + %tmp144 = insertelement <8 x float> %tmp143, float %tmp134, i64 1 |
| 90 | + %tmp145 = insertelement <8 x float> %tmp144, float %tmp135, i64 2 |
| 91 | + %tmp146 = insertelement <8 x float> %tmp145, float %tmp136, i64 3 |
| 92 | + %tmp147 = insertelement <8 x float> %tmp146, float %tmp137, i64 4 |
| 93 | + %tmp148 = insertelement <8 x float> %tmp147, float %tmp138, i64 5 |
| 94 | + %tmp149 = insertelement <8 x float> %tmp148, float %tmp139, i64 6 |
| 95 | + %tmp150 = insertelement <8 x float> %tmp149, float %tmp140, i64 7 |
| 96 | + %tmp151 = bitcast <8 x float> %tmp150 to <8 x i32> |
| 97 | + call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %tmp151) |
| 98 | + ret void |
| 99 | +} |
| 100 | + |
| 101 | +declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) |
| 102 | + |
| 103 | +declare <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32) |
| 104 | + |
| 105 | +declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32) |
| 106 | + |
| 107 | +declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>) |
| 108 | + |
0 commit comments