@@ -5244,13 +5244,13 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5244
5244
.addImm(AMDGPU::sub1);
5245
5245
break;
5246
5246
}
5247
- }
5247
+ }
5248
5248
case AMDGPU::S_SUB_I32: {
5249
5249
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
5250
5250
5251
5251
// Take the negation of the source operand.
5252
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32 ), NegatedVal)
5253
- .addImm(-1 )
5252
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_SUB_I32 ), NegatedVal)
5253
+ .addImm(0 )
5254
5254
.addReg(SrcReg);
5255
5255
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5256
5256
.addReg(NegatedVal)
@@ -5288,17 +5288,16 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5288
5288
MI, MRI, MI.getOperand(1), Src1RC, AMDGPU::sub1, Src1SubRC);
5289
5289
5290
5290
if (Opc == AMDGPU::S_SUB_U64_PSEUDO) {
5291
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedValLo)
5292
- .addReg(NewAccumulator->getOperand(0).getReg())
5293
- .addImm(-1);
5294
-
5295
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ASHR_I32), NegatedValHi)
5296
- .addReg(NegatedValLo)
5297
- .addImm(31)
5298
- .setOperandDead(3); // Dead scc
5299
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1L_Op0H_Reg)
5300
- .add(Op1L)
5301
- .addReg(NegatedValHi);
5291
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_SUB_I32), NegatedValLo)
5292
+ .addImm(0)
5293
+ .addReg(NewAccumulator->getOperand(0).getReg());
5294
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ASHR_I32), NegatedValHi)
5295
+ .addReg(NegatedValLo)
5296
+ .addImm(31)
5297
+ .setOperandDead(3); // Dead scc
5298
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1L_Op0H_Reg)
5299
+ .add(Op1L)
5300
+ .addReg(NegatedValHi);
5302
5301
}
5303
5302
Register LowOpcode = Opc == AMDGPU::S_SUB_U64_PSEUDO
5304
5303
? NegatedValLo
0 commit comments