@@ -5136,18 +5136,23 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5136
5136
if (isSGPR) {
5137
5137
switch (Opc) {
5138
5138
case AMDGPU::S_MIN_U32:
5139
- case AMDGPU::V_CMP_LT_U64_e64: /*umin*/
5140
5139
case AMDGPU::S_MIN_I32:
5141
- case AMDGPU::V_CMP_LT_I64_e64: /*min*/
5142
5140
case AMDGPU::S_MAX_U32:
5143
- case AMDGPU::V_CMP_GT_U64_e64: /*umax*/
5144
5141
case AMDGPU::S_MAX_I32:
5145
- case AMDGPU::V_CMP_GT_I64_e64: /*max*/
5146
5142
case AMDGPU::S_AND_B32:
5147
5143
case AMDGPU::S_OR_B32: {
5148
5144
// Idempotent operations.
5149
- unsigned movOpc = is32BitOpc ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5150
- BuildMI(BB, MI, DL, TII->get(movOpc), DstReg).addReg(SrcReg);
5145
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
5146
+ RetBB = &BB;
5147
+ break;
5148
+ }
5149
+ case AMDGPU::V_CMP_LT_U64_e64: // umin
5150
+ case AMDGPU::V_CMP_LT_I64_e64: // min
5151
+ case AMDGPU::V_CMP_GT_U64_e64: // umax
5152
+ case AMDGPU::V_CMP_GT_I64_e64: // max
5153
+ {
5154
+ // Idempotent operations.
5155
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);
5151
5156
RetBB = &BB;
5152
5157
break;
5153
5158
}
@@ -5341,9 +5346,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5341
5346
Register LaneMaskReg = MRI.createVirtualRegister(WaveMaskRegClass);
5342
5347
Register ComparisonResultReg =
5343
5348
MRI.createVirtualRegister(WaveMaskRegClass);
5344
- const TargetRegisterClass *VregClass =
5345
- ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
5346
- : &AMDGPU::VReg_64RegClass;
5349
+ const TargetRegisterClass *VregClass = TRI->getVGPR64Class();
5347
5350
const TargetRegisterClass *VSubRegClass =
5348
5351
TRI->getSubRegisterClass(VregClass, AMDGPU::sub0);
5349
5352
Register AccumulatorVReg = MRI.createVirtualRegister(VregClass);
0 commit comments