@@ -5166,39 +5166,39 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5166
5166
const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
5167
5167
const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
5168
5168
Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
5169
- Register ActiveLanes =
5169
+ Register NumActiveLanes =
5170
5170
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5171
5171
5172
5172
bool IsWave32 = ST.isWave32();
5173
5173
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5174
5174
MCRegister ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
5175
- unsigned CountReg =
5175
+ unsigned BitCountOpc =
5176
5176
IsWave32 ? AMDGPU::S_BCNT1_I32_B32 : AMDGPU::S_BCNT1_I32_B64;
5177
5177
5178
- BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
5179
-
5180
- auto NewAccumulator =
5181
- BuildMI(BB, MI, DL, TII->get(CountReg ), ActiveLanes )
5182
- .addReg(ExecMask);
5183
-
5184
- switch (Opc) {
5185
- case AMDGPU::S_XOR_B32: {
5186
- // Performing an XOR operation on a uniform value
5187
- // depends on the parity of the number of active lanes.
5188
- // For even parity, the result will be 0, for odd
5189
- // parity the result will be the same as the input value.
5190
- Register ParityRegister =
5191
- MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5192
-
5193
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
5194
- .addReg(NewAccumulator->getOperand(0).getReg())
5195
- .addImm(1)
5196
- .setOperandDead(3); // Dead scc
5197
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5198
- .addReg(SrcReg)
5199
- .addReg(ParityRegister);
5200
- break;
5201
- }
5178
+ BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
5179
+
5180
+ auto NewAccumulator =
5181
+ BuildMI(BB, MI, DL, TII->get(BitCountOpc ), NumActiveLanes )
5182
+ .addReg(ExecMask);
5183
+
5184
+ switch (Opc) {
5185
+ case AMDGPU::S_XOR_B32: {
5186
+ // Performing an XOR operation on a uniform value
5187
+ // depends on the parity of the number of active lanes.
5188
+ // For even parity, the result will be 0, for odd
5189
+ // parity the result will be the same as the input value.
5190
+ Register ParityRegister =
5191
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5192
+
5193
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
5194
+ .addReg(NewAccumulator->getOperand(0).getReg())
5195
+ .addImm(1)
5196
+ .setOperandDead(3); // Dead scc
5197
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5198
+ .addReg(SrcReg)
5199
+ .addReg(ParityRegister);
5200
+ break;
5201
+ }
5202
5202
case AMDGPU::S_SUB_I32: {
5203
5203
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
5204
5204
@@ -5450,8 +5450,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
5450
5450
.addReg(Accumulator->getOperand(0).getReg());
5451
5451
break;
5452
5452
}
5453
- case :: AMDGPU::S_ADD_U64_PSEUDO:
5454
- case :: AMDGPU::S_SUB_U64_PSEUDO: {
5453
+ case AMDGPU::S_ADD_U64_PSEUDO:
5454
+ case AMDGPU::S_SUB_U64_PSEUDO: {
5455
5455
unsigned newOpc1 = Opc == AMDGPU::S_ADD_U64_PSEUDO ? AMDGPU::S_ADD_U32
5456
5456
: AMDGPU::S_SUB_U32;
5457
5457
unsigned newOpc2 = Opc == AMDGPU::S_ADD_U64_PSEUDO ? AMDGPU::S_ADDC_U32
0 commit comments