@@ -5196,54 +5196,54 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5196
5196
.addReg(NewAccumulator->getOperand(0).getReg())
5197
5197
.addImm(1)
5198
5198
.setOperandDead(3); // Dead scc
5199
- if (is32BitOpc) {
5200
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5201
- .addReg(SrcReg)
5202
- .addReg(ParityRegister);
5203
- break;
5204
- } else {
5205
- Register DestSub0 =
5206
- MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5207
- Register DestSub1 =
5208
- MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5209
- Register Op1H_Op0L_Reg =
5210
- MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5211
- Register CarryReg =
5212
- MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5213
-
5214
- const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
5215
- const TargetRegisterClass *SrcSubRC =
5216
- TRI->getSubRegisterClass(SrcRC, AMDGPU::sub0);
5217
-
5218
- MachineOperand Op1L = TII->buildExtractSubRegOrImm(
5219
- MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub0, SrcSubRC);
5220
- MachineOperand Op1H = TII->buildExtractSubRegOrImm(
5221
- MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub1, SrcSubRC);
5222
-
5223
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DestSub0)
5224
- .add(Op1L)
5225
- .addReg(ParityRegister);
5226
-
5227
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1H_Op0L_Reg)
5228
- .add(Op1H)
5229
- .addReg(ParityRegister);
5230
-
5231
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_HI_U32), CarryReg)
5232
- .add(Op1L)
5233
- .addReg(ParityRegister);
5234
-
5235
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ADD_U32), DestSub1)
5236
- .addReg(CarryReg)
5237
- .addReg(Op1H_Op0L_Reg)
5238
- .setOperandDead(3); // Dead scc
5239
-
5240
- BuildMI(BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), DstReg)
5241
- .addReg(DestSub0)
5242
- .addImm(AMDGPU::sub0)
5243
- .addReg(DestSub1)
5244
- .addImm(AMDGPU::sub1);
5245
- break;
5246
- }
5199
+ if (is32BitOpc) {
5200
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5201
+ .addReg(SrcReg)
5202
+ .addReg(ParityRegister);
5203
+ break;
5204
+ } else {
5205
+ Register DestSub0 =
5206
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5207
+ Register DestSub1 =
5208
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5209
+ Register Op1H_Op0L_Reg =
5210
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5211
+ Register CarryReg =
5212
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5213
+
5214
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
5215
+ const TargetRegisterClass *SrcSubRC =
5216
+ TRI->getSubRegisterClass(SrcRC, AMDGPU::sub0);
5217
+
5218
+ MachineOperand Op1L = TII->buildExtractSubRegOrImm(
5219
+ MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub0, SrcSubRC);
5220
+ MachineOperand Op1H = TII->buildExtractSubRegOrImm(
5221
+ MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub1, SrcSubRC);
5222
+
5223
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DestSub0)
5224
+ .add(Op1L)
5225
+ .addReg(ParityRegister);
5226
+
5227
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1H_Op0L_Reg)
5228
+ .add(Op1H)
5229
+ .addReg(ParityRegister);
5230
+
5231
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_HI_U32), CarryReg)
5232
+ .add(Op1L)
5233
+ .addReg(ParityRegister);
5234
+
5235
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ADD_U32), DestSub1)
5236
+ .addReg(CarryReg)
5237
+ .addReg(Op1H_Op0L_Reg)
5238
+ .setOperandDead(3); // Dead scc
5239
+
5240
+ BuildMI(BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), DstReg)
5241
+ .addReg(DestSub0)
5242
+ .addImm(AMDGPU::sub0)
5243
+ .addReg(DestSub1)
5244
+ .addImm(AMDGPU::sub1);
5245
+ break;
5246
+ }
5247
5247
}
5248
5248
case AMDGPU::S_SUB_I32: {
5249
5249
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
@@ -5389,14 +5389,13 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5389
5389
IdentityValue = int32_t(0); // u|max
5390
5390
break;
5391
5391
}
5392
- BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identitylo)
5393
- .addImm(IdentityValue);
5394
- BuildMI(BB, I, DL, TII->get(TargetOpcode::REG_SEQUENCE),
5395
- IdentityValReg)
5396
- .addReg(Identitylo)
5397
- .addImm(AMDGPU::sub0)
5398
- .addReg(Identityhi)
5399
- .addImm(AMDGPU::sub1);
5392
+ BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identitylo)
5393
+ .addImm(IdentityValue);
5394
+ BuildMI(BB, I, DL, TII->get(TargetOpcode::REG_SEQUENCE), IdentityValReg)
5395
+ .addReg(Identitylo)
5396
+ .addImm(AMDGPU::sub0)
5397
+ .addReg(Identityhi)
5398
+ .addImm(AMDGPU::sub1);
5400
5399
}
5401
5400
// clang-format off
5402
5401
BuildMI(BB, I, DL, TII->get(AMDGPU::S_BRANCH))
0 commit comments