Skip to content

Commit 8502f11

Browse files
committed
[NVPTX] don't erase CopyToRegs when folding movs into loads
We may still need to keep CopyToReg even after folding uses into vector loads, since the original register may be used in other blocks. Partially reverts 1fdbe69
1 parent 48cd22c commit 8502f11

File tree

7 files changed

+427
-88
lines changed

7 files changed

+427
-88
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5060,11 +5060,10 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
50605060
return !U.getUser()->use_empty();
50615061
}
50625062

5063-
// Handle CopyToReg nodes that will become dead after our replacement
5064-
if (U.getUser()->getOpcode() == ISD::CopyToReg) {
5065-
DeadCopyToRegs.push_back(U.getUser());
5063+
// Peek through CopyToReg nodes. TODO: SelectionDAG needs to be improved
5064+
// to eliminate these nodes when they're unused in -O0. See PR126337.
5065+
if (U.getUser()->getOpcode() == ISD::CopyToReg)
50665066
return true;
5067-
}
50685067

50695068
// Otherwise, this use prevents us from splitting a value.
50705069
return false;
@@ -5132,10 +5131,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
51325131
for (unsigned I : seq(NewLoad->getNumValues() - NewNumOutputs))
51335132
Results.push_back(NewLoad.getValue(NewNumOutputs + I));
51345133

5135-
// Remove dead CopyToReg nodes by folding them into the chain they reference
5136-
for (SDNode *CTR : DeadCopyToRegs)
5137-
DCI.CombineTo(CTR, CTR->getOperand(0));
5138-
51395134
return DCI.DAG.getMergeValues(Results, DL);
51405135
}
51415136

@@ -6544,4 +6539,4 @@ void NVPTXTargetLowering::computeKnownBitsForTargetNode(
65446539
default:
65456540
break;
65466541
}
6547-
}
6542+
}

llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 139 additions & 38 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 146 additions & 29 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/NVPTX/i16x2-instructions.ll

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ define i16 @test_extract_0(<2 x i16> %a) #0 {
3939
; COMMON-EMPTY:
4040
; COMMON-NEXT: // %bb.0:
4141
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_0_param_0];
42+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
4243
; COMMON-NEXT: cvt.u32.u16 %r2, %rs1;
4344
; COMMON-NEXT: st.param.b32 [func_retval0], %r2;
4445
; COMMON-NEXT: ret;
@@ -54,6 +55,7 @@ define i16 @test_extract_1(<2 x i16> %a) #0 {
5455
; COMMON-EMPTY:
5556
; COMMON-NEXT: // %bb.0:
5657
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_1_param_0];
58+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
5759
; COMMON-NEXT: cvt.u32.u16 %r2, %rs2;
5860
; COMMON-NEXT: st.param.b32 [func_retval0], %r2;
5961
; COMMON-NEXT: ret;
@@ -70,8 +72,9 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
7072
; COMMON-NEXT: .reg .b64 %rd<2>;
7173
; COMMON-EMPTY:
7274
; COMMON-NEXT: // %bb.0:
73-
; COMMON-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
7475
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_i_param_0];
76+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
77+
; COMMON-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
7578
; COMMON-NEXT: setp.eq.b64 %p1, %rd1, 0;
7679
; COMMON-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
7780
; COMMON-NEXT: cvt.u32.u16 %r2, %rs3;
@@ -100,7 +103,9 @@ define <2 x i16> @test_add(<2 x i16> %a, <2 x i16> %b) #0 {
100103
; NO-I16x2-EMPTY:
101104
; NO-I16x2-NEXT: // %bb.0:
102105
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_param_0];
106+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
103107
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_add_param_1];
108+
; NO-I16x2-NEXT: mov.b32 %r2, {%rs3, %rs4};
104109
; NO-I16x2-NEXT: add.s16 %rs5, %rs2, %rs4;
105110
; NO-I16x2-NEXT: add.s16 %rs6, %rs1, %rs3;
106111
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -129,6 +134,7 @@ define <2 x i16> @test_add_imm_0(<2 x i16> %a) #0 {
129134
; NO-I16x2-EMPTY:
130135
; NO-I16x2-NEXT: // %bb.0:
131136
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_imm_0_param_0];
137+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
132138
; NO-I16x2-NEXT: add.s16 %rs3, %rs2, 2;
133139
; NO-I16x2-NEXT: add.s16 %rs4, %rs1, 1;
134140
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
@@ -156,6 +162,7 @@ define <2 x i16> @test_add_imm_1(<2 x i16> %a) #0 {
156162
; NO-I16x2-EMPTY:
157163
; NO-I16x2-NEXT: // %bb.0:
158164
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_add_imm_1_param_0];
165+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
159166
; NO-I16x2-NEXT: add.s16 %rs3, %rs2, 2;
160167
; NO-I16x2-NEXT: add.s16 %rs4, %rs1, 1;
161168
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
@@ -172,7 +179,9 @@ define <2 x i16> @test_sub(<2 x i16> %a, <2 x i16> %b) #0 {
172179
; COMMON-EMPTY:
173180
; COMMON-NEXT: // %bb.0:
174181
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sub_param_0];
182+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
175183
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_sub_param_1];
184+
; COMMON-NEXT: mov.b32 %r2, {%rs3, %rs4};
176185
; COMMON-NEXT: sub.s16 %rs5, %rs2, %rs4;
177186
; COMMON-NEXT: sub.s16 %rs6, %rs1, %rs3;
178187
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -200,7 +209,9 @@ define <2 x i16> @test_smax(<2 x i16> %a, <2 x i16> %b) #0 {
200209
; NO-I16x2-EMPTY:
201210
; NO-I16x2-NEXT: // %bb.0:
202211
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_smax_param_0];
212+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
203213
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_smax_param_1];
214+
; NO-I16x2-NEXT: mov.b32 %r2, {%rs3, %rs4};
204215
; NO-I16x2-NEXT: max.s16 %rs5, %rs2, %rs4;
205216
; NO-I16x2-NEXT: max.s16 %rs6, %rs1, %rs3;
206217
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -229,7 +240,9 @@ define <2 x i16> @test_umax(<2 x i16> %a, <2 x i16> %b) #0 {
229240
; NO-I16x2-EMPTY:
230241
; NO-I16x2-NEXT: // %bb.0:
231242
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_umax_param_0];
243+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
232244
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_umax_param_1];
245+
; NO-I16x2-NEXT: mov.b32 %r2, {%rs3, %rs4};
233246
; NO-I16x2-NEXT: max.u16 %rs5, %rs2, %rs4;
234247
; NO-I16x2-NEXT: max.u16 %rs6, %rs1, %rs3;
235248
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -258,7 +271,9 @@ define <2 x i16> @test_smin(<2 x i16> %a, <2 x i16> %b) #0 {
258271
; NO-I16x2-EMPTY:
259272
; NO-I16x2-NEXT: // %bb.0:
260273
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_smin_param_0];
274+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
261275
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_smin_param_1];
276+
; NO-I16x2-NEXT: mov.b32 %r2, {%rs3, %rs4};
262277
; NO-I16x2-NEXT: min.s16 %rs5, %rs2, %rs4;
263278
; NO-I16x2-NEXT: min.s16 %rs6, %rs1, %rs3;
264279
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -287,7 +302,9 @@ define <2 x i16> @test_umin(<2 x i16> %a, <2 x i16> %b) #0 {
287302
; NO-I16x2-EMPTY:
288303
; NO-I16x2-NEXT: // %bb.0:
289304
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_umin_param_0];
305+
; NO-I16x2-NEXT: mov.b32 %r1, {%rs1, %rs2};
290306
; NO-I16x2-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_umin_param_1];
307+
; NO-I16x2-NEXT: mov.b32 %r2, {%rs3, %rs4};
291308
; NO-I16x2-NEXT: min.u16 %rs5, %rs2, %rs4;
292309
; NO-I16x2-NEXT: min.u16 %rs6, %rs1, %rs3;
293310
; NO-I16x2-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -305,7 +322,9 @@ define <2 x i16> @test_mul(<2 x i16> %a, <2 x i16> %b) #0 {
305322
; COMMON-EMPTY:
306323
; COMMON-NEXT: // %bb.0:
307324
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_mul_param_0];
325+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
308326
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_mul_param_1];
327+
; COMMON-NEXT: mov.b32 %r2, {%rs3, %rs4};
309328
; COMMON-NEXT: mul.lo.s16 %rs5, %rs2, %rs4;
310329
; COMMON-NEXT: mul.lo.s16 %rs6, %rs1, %rs3;
311330
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -687,11 +706,15 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x
687706
; COMMON-EMPTY:
688707
; COMMON-NEXT: // %bb.0:
689708
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0];
690-
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_2];
691-
; COMMON-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_3];
692-
; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs5;
693-
; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs6;
709+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
710+
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_3];
711+
; COMMON-NEXT: mov.b32 %r4, {%rs3, %rs4};
712+
; COMMON-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_2];
713+
; COMMON-NEXT: mov.b32 %r3, {%rs5, %rs6};
694714
; COMMON-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1];
715+
; COMMON-NEXT: mov.b32 %r2, {%rs7, %rs8};
716+
; COMMON-NEXT: setp.ne.b16 %p1, %rs5, %rs3;
717+
; COMMON-NEXT: setp.ne.b16 %p2, %rs6, %rs4;
695718
; COMMON-NEXT: selp.b16 %rs9, %rs2, %rs8, %p2;
696719
; COMMON-NEXT: selp.b16 %rs10, %rs1, %rs7, %p1;
697720
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9};
@@ -711,10 +734,12 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b,
711734
; COMMON-NEXT: // %bb.0:
712735
; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i32_i16_param_1];
713736
; COMMON-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_i32_i16_param_0];
714-
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i32_i16_param_2];
715-
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i32_i16_param_3];
716-
; COMMON-NEXT: setp.ne.b16 %p1, %rs1, %rs3;
717-
; COMMON-NEXT: setp.ne.b16 %p2, %rs2, %rs4;
737+
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i32_i16_param_3];
738+
; COMMON-NEXT: mov.b32 %r6, {%rs1, %rs2};
739+
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i32_i16_param_2];
740+
; COMMON-NEXT: mov.b32 %r5, {%rs3, %rs4};
741+
; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1;
742+
; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2;
718743
; COMMON-NEXT: selp.b32 %r7, %r2, %r4, %p2;
719744
; COMMON-NEXT: selp.b32 %r8, %r1, %r3, %p1;
720745
; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7};
@@ -733,12 +758,14 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b,
733758
; COMMON-NEXT: .reg .b32 %r<7>;
734759
; COMMON-EMPTY:
735760
; COMMON-NEXT: // %bb.0:
761+
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i16_i32_param_0];
762+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
736763
; COMMON-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_i16_i32_param_3];
737764
; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i16_i32_param_2];
738-
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_i16_i32_param_0];
765+
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i16_i32_param_1];
766+
; COMMON-NEXT: mov.b32 %r2, {%rs3, %rs4};
739767
; COMMON-NEXT: setp.ne.b32 %p1, %r3, %r5;
740768
; COMMON-NEXT: setp.ne.b32 %p2, %r4, %r6;
741-
; COMMON-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_i16_i32_param_1];
742769
; COMMON-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2;
743770
; COMMON-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1;
744771
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -852,6 +879,7 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 {
852879
; COMMON-EMPTY:
853880
; COMMON-NEXT: // %bb.0:
854881
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_zext_2xi32_param_0];
882+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
855883
; COMMON-NEXT: cvt.u32.u16 %r2, %rs2;
856884
; COMMON-NEXT: cvt.u32.u16 %r3, %rs1;
857885
; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2};
@@ -869,6 +897,7 @@ define <2 x i64> @test_zext_2xi64(<2 x i16> %a) #0 {
869897
; COMMON-EMPTY:
870898
; COMMON-NEXT: // %bb.0:
871899
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_zext_2xi64_param_0];
900+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
872901
; COMMON-NEXT: cvt.u64.u16 %rd1, %rs2;
873902
; COMMON-NEXT: cvt.u64.u16 %rd2, %rs1;
874903
; COMMON-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1};
@@ -927,6 +956,7 @@ define <2 x i16> @test_shufflevector(<2 x i16> %a) #0 {
927956
; COMMON-EMPTY:
928957
; COMMON-NEXT: // %bb.0:
929958
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_shufflevector_param_0];
959+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
930960
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
931961
; COMMON-NEXT: ret;
932962
%s = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
@@ -940,8 +970,9 @@ define <2 x i16> @test_insertelement(<2 x i16> %a, i16 %x) #0 {
940970
; COMMON-NEXT: .reg .b32 %r<2>;
941971
; COMMON-EMPTY:
942972
; COMMON-NEXT: // %bb.0:
943-
; COMMON-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1];
944973
; COMMON-NEXT: ld.param.v2.b16 {%rs2, %rs3}, [test_insertelement_param_0];
974+
; COMMON-NEXT: mov.b32 %r1, {%rs2, %rs3};
975+
; COMMON-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1];
945976
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
946977
; COMMON-NEXT: ret;
947978
%i = insertelement <2 x i16> %a, i16 %x, i64 1
@@ -956,6 +987,7 @@ define <2 x i16> @test_fptosi_2xhalf_to_2xi16(<2 x half> %a) #0 {
956987
; COMMON-EMPTY:
957988
; COMMON-NEXT: // %bb.0:
958989
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_2xhalf_to_2xi16_param_0];
990+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
959991
; COMMON-NEXT: cvt.rzi.s16.f16 %rs3, %rs2;
960992
; COMMON-NEXT: cvt.rzi.s16.f16 %rs4, %rs1;
961993
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
@@ -972,6 +1004,7 @@ define <2 x i16> @test_fptoui_2xhalf_to_2xi16(<2 x half> %a) #0 {
9721004
; COMMON-EMPTY:
9731005
; COMMON-NEXT: // %bb.0:
9741006
; COMMON-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xhalf_to_2xi16_param_0];
1007+
; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2};
9751008
; COMMON-NEXT: cvt.rzi.u16.f16 %rs3, %rs2;
9761009
; COMMON-NEXT: cvt.rzi.u16.f16 %rs4, %rs1;
9771010
; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};

llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,6 +1936,8 @@ define <4 x i8> @test_fptosi_4xhalf_to_4xi8(<4 x half> %a) #0 {
19361936
; O0-EMPTY:
19371937
; O0-NEXT: // %bb.0:
19381938
; O0-NEXT: ld.param.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [test_fptosi_4xhalf_to_4xi8_param_0];
1939+
; O0-NEXT: mov.b32 %r2, {%rs3, %rs4};
1940+
; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
19391941
; O0-NEXT: cvt.rzi.s16.f16 %rs5, %rs4;
19401942
; O0-NEXT: cvt.rzi.s16.f16 %rs6, %rs3;
19411943
; O0-NEXT: mov.b32 %r3, {%rs6, %rs5};
@@ -1990,6 +1992,8 @@ define <4 x i8> @test_fptoui_4xhalf_to_4xi8(<4 x half> %a) #0 {
19901992
; O0-EMPTY:
19911993
; O0-NEXT: // %bb.0:
19921994
; O0-NEXT: ld.param.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [test_fptoui_4xhalf_to_4xi8_param_0];
1995+
; O0-NEXT: mov.b32 %r2, {%rs3, %rs4};
1996+
; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
19931997
; O0-NEXT: cvt.rzi.u16.f16 %rs5, %rs4;
19941998
; O0-NEXT: cvt.rzi.u16.f16 %rs6, %rs3;
19951999
; O0-NEXT: mov.b32 %r3, {%rs6, %rs5};

0 commit comments

Comments
 (0)