Skip to content

Commit 160fd1f

Browse files
committed
Removing redundant variables.
1 parent 2685c0e commit 160fd1f

File tree

1 file changed

+62
-72
lines changed

1 file changed

+62
-72
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 62 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -5176,31 +5176,30 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
51765176
unsigned CountReg =
51775177
IsWave32 ? AMDGPU::S_BCNT1_I32_B32 : AMDGPU::S_BCNT1_I32_B64;
51785178

5179-
auto Exec =
51805179
BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
51815180

5182-
auto NewAccumulator = BuildMI(BB, MI, DL, TII->get(CountReg), ActiveLanes)
5183-
.addReg(Exec->getOperand(0).getReg());
5181+
auto NewAccumulator =
5182+
BuildMI(BB, MI, DL, TII->get(CountReg), ActiveLanes)
5183+
.addReg(ExecMask);
51845184

5185-
switch (Opc) {
5186-
case AMDGPU::S_XOR_B32:
5187-
case AMDGPU::S_XOR_B64: {
5188-
// Performing an XOR operation on a uniform value
5189-
// depends on the parity of the number of active lanes.
5190-
// For even parity, the result will be 0, for odd
5191-
// parity the result will be the same as the input value.
5192-
Register ParityRegister =
5193-
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5185+
switch (Opc) {
5186+
case AMDGPU::S_XOR_B32:
5187+
case AMDGPU::S_XOR_B64: {
5188+
// Performing an XOR operation on a uniform value
5189+
// depends on the parity of the number of active lanes.
5190+
// For even parity, the result will be 0, for odd
5191+
// parity the result will be the same as the input value.
5192+
Register ParityRegister =
5193+
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
51945194

5195-
auto ParityReg =
51965195
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
51975196
.addReg(NewAccumulator->getOperand(0).getReg())
51985197
.addImm(1)
51995198
.setOperandDead(3); // Dead scc
52005199
if (is32BitOpc) {
52015200
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
52025201
.addReg(SrcReg)
5203-
.addReg(ParityReg->getOperand(0).getReg());
5202+
.addReg(ParityRegister);
52045203
break;
52055204
} else {
52065205
Register DestSub0 =
@@ -5223,15 +5222,15 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
52235222

52245223
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DestSub0)
52255224
.add(Op1L)
5226-
.addReg(ParityReg->getOperand(0).getReg());
5225+
.addReg(ParityRegister);
52275226

52285227
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1H_Op0L_Reg)
52295228
.add(Op1H)
5230-
.addReg(ParityReg->getOperand(0).getReg());
5229+
.addReg(ParityRegister);
52315230

52325231
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_HI_U32), CarryReg)
52335232
.add(Op1L)
5234-
.addReg(ParityReg->getOperand(0).getReg());
5233+
.addReg(ParityRegister);
52355234

52365235
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ADD_U32), DestSub1)
52375236
.addReg(CarryReg)
@@ -5250,12 +5249,11 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
52505249
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
52515250

52525251
// Take the negation of the source operand.
5253-
auto InvertedValReg =
5254-
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedVal)
5255-
.addImm(-1)
5256-
.addReg(SrcReg);
5252+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedVal)
5253+
.addImm(-1)
5254+
.addReg(SrcReg);
52575255
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
5258-
.addReg(InvertedValReg->getOperand(0).getReg())
5256+
.addReg(NegatedVal)
52595257
.addReg(NewAccumulator->getOperand(0).getReg());
52605258
break;
52615259
}
@@ -5294,14 +5292,13 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
52945292
.addReg(NewAccumulator->getOperand(0).getReg())
52955293
.addImm(-1);
52965294

5297-
MachineInstr *NegatedHi =
52985295
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ASHR_I32), NegatedValHi)
52995296
.addReg(NegatedValLo)
53005297
.addImm(31)
53015298
.setOperandDead(3); // Dead scc
5302-
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1L_Op0H_Reg)
5303-
.add(Op1L)
5304-
.addReg(NegatedHi->getOperand(0).getReg());
5299+
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1L_Op0H_Reg)
5300+
.add(Op1L)
5301+
.addReg(NegatedValHi);
53055302
}
53065303
Register LowOpcode = Opc == AMDGPU::S_SUB_U64_PSEUDO
53075304
? NegatedValLo
@@ -5374,17 +5371,15 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53745371
// Create initial values of induction variable from Exec, Accumulator and
53755372
// insert branch instr to newly created ComputeBlock
53765373
uint32_t IdentityValue = getIdentityValueForWaveReduction(Opc);
5377-
auto TmpSReg = BuildMI(BB, I, DL, TII->get(MovOpcForExec), LoopIterator)
5378-
.addReg(ExecReg);
5374+
BuildMI(BB, I, DL, TII->get(MovOpcForExec), LoopIterator).addReg(ExecReg);
53795375
if (is32BitOpc) {
53805376
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), IdentityValReg)
53815377
.addImm(IdentityValue);
53825378
} else {
53835379
Register Identitylo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
53845380
Register Identityhi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5385-
MachineInstr *IdenHi =
5386-
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identityhi)
5387-
.addImm(IdentityValue);
5381+
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identityhi)
5382+
.addImm(IdentityValue);
53885383
switch (Opc) {
53895384
case AMDGPU::V_CMP_LT_U64_e64:
53905385
case AMDGPU::V_CMP_LT_I64_e64:
@@ -5395,14 +5390,14 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53955390
IdentityValue = int32_t(0); // u|max
53965391
break;
53975392
}
5398-
MachineInstr *IdenLo =
53995393
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identitylo)
54005394
.addImm(IdentityValue);
5401-
BuildMI(BB, I, DL, TII->get(TargetOpcode::REG_SEQUENCE), IdentityValReg)
5402-
.addReg(IdenLo->getOperand(0).getReg())
5403-
.addImm(AMDGPU::sub0)
5404-
.addReg(IdenHi->getOperand(0).getReg())
5405-
.addImm(AMDGPU::sub1);
5395+
BuildMI(BB, I, DL, TII->get(TargetOpcode::REG_SEQUENCE),
5396+
IdentityValReg)
5397+
.addReg(Identitylo)
5398+
.addImm(AMDGPU::sub0)
5399+
.addReg(Identityhi)
5400+
.addImm(AMDGPU::sub1);
54065401
}
54075402
// clang-format off
54085403
BuildMI(BB, I, DL, TII->get(AMDGPU::S_BRANCH))
@@ -5417,24 +5412,23 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
54175412
.addMBB(&BB);
54185413
auto ActiveBits =
54195414
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::PHI), ActiveBitsReg)
5420-
.addReg(TmpSReg->getOperand(0).getReg())
5415+
.addReg(LoopIterator)
54215416
.addMBB(&BB);
54225417

54235418
I = ComputeLoop->end();
54245419
MachineInstr *NewAccumulator;
54255420
// Perform the computations
54265421
unsigned SFFOpc = IsWave32 ? AMDGPU::S_FF1_I32_B32 : AMDGPU::S_FF1_I32_B64;
5427-
auto FF1 = BuildMI(*ComputeLoop, I, DL, TII->get(SFFOpc), FF1Reg)
5428-
.addReg(ActiveBits->getOperand(0).getReg());
5422+
BuildMI(*ComputeLoop, I, DL, TII->get(SFFOpc), FF1Reg)
5423+
.addReg(ActiveBitsReg);
54295424
if (is32BitOpc) {
5430-
MachineInstr *LaneValue =
5431-
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::V_READLANE_B32),
5432-
LaneValueReg)
5433-
.addReg(SrcReg)
5434-
.addReg(FF1->getOperand(0).getReg());
5425+
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::V_READLANE_B32),
5426+
LaneValueReg)
5427+
.addReg(SrcReg)
5428+
.addReg(FF1Reg);
54355429
NewAccumulator = BuildMI(*ComputeLoop, I, DL, TII->get(Opc), DstReg)
54365430
.addReg(Accumulator->getOperand(0).getReg())
5437-
.addReg(LaneValue->getOperand(0).getReg());
5431+
.addReg(LaneValueReg);
54385432
} else {
54395433
Register LaneValueLoReg =
54405434
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -5453,17 +5447,17 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
54535447
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::V_READLANE_B32),
54545448
LaneValueLoReg)
54555449
.add(Op1L)
5456-
.addReg(FF1->getOperand(0).getReg());
5450+
.addReg(FF1Reg);
54575451
MachineInstr *LaneValueHi =
54585452
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::V_READLANE_B32),
54595453
LaneValueHiReg)
54605454
.add(Op1H)
5461-
.addReg(FF1->getOperand(0).getReg());
5455+
.addReg(FF1Reg);
54625456
auto LaneValue = BuildMI(*ComputeLoop, I, DL,
54635457
TII->get(TargetOpcode::REG_SEQUENCE), LaneValReg)
5464-
.addReg(LaneValueLo->getOperand(0).getReg())
5458+
.addReg(LaneValueLoReg)
54655459
.addImm(AMDGPU::sub0)
5466-
.addReg(LaneValueHi->getOperand(0).getReg())
5460+
.addReg(LaneValueHiReg)
54675461
.addImm(AMDGPU::sub1);
54685462
switch (Opc) {
54695463
case ::AMDGPU::S_OR_B64:
@@ -5500,14 +5494,14 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55005494
.addImm(AMDGPU::sub0)
55015495
.add(SrcReg0Sub1)
55025496
.addImm(AMDGPU::sub1);
5503-
auto LaneMask = BuildMI(*ComputeLoop, I, DL, TII->get(Opc), LaneMaskReg)
5504-
.addReg(LaneValue->getOperand(0).getReg())
5505-
.addReg(AccumulatorVReg);
5497+
BuildMI(*ComputeLoop, I, DL, TII->get(Opc), LaneMaskReg)
5498+
.addReg(LaneValue->getOperand(0).getReg())
5499+
.addReg(AccumulatorVReg);
55065500

55075501
unsigned AndOpc = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
55085502
BuildMI(*ComputeLoop, I, DL, TII->get(AndOpc), ComparisonResultReg)
5509-
.addReg(LaneMask->getOperand(0).getReg())
5510-
.addReg(ActiveBits->getOperand(0).getReg());
5503+
.addReg(LaneMaskReg)
5504+
.addReg(ActiveBitsReg);
55115505

55125506
NewAccumulator = BuildMI(*ComputeLoop, I, DL,
55135507
TII->get(AMDGPU::S_CSELECT_B64), DstReg)
@@ -5529,19 +5523,17 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55295523
MachineOperand Accumhi = TII->buildExtractSubRegOrImm(
55305524
MI, MRI, Accumulator->getOperand(0), DstRegClass, AMDGPU::sub1,
55315525
&AMDGPU::SReg_32RegClass);
5532-
MachineInstr *DestLoComputation =
5533-
BuildMI(*ComputeLoop, I, DL, TII->get(newOpc1), DestLo)
5534-
.add(Accumlo)
5535-
.addReg(LaneValueLo->getOperand(0).getReg());
5536-
MachineInstr *DestHiComputation =
5537-
BuildMI(*ComputeLoop, I, DL, TII->get(newOpc2), DestHi)
5538-
.add(Accumhi)
5539-
.addReg(LaneValueHi->getOperand(0).getReg());
5526+
BuildMI(*ComputeLoop, I, DL, TII->get(newOpc1), DestLo)
5527+
.add(Accumlo)
5528+
.addReg(LaneValueLo->getOperand(0).getReg());
5529+
BuildMI(*ComputeLoop, I, DL, TII->get(newOpc2), DestHi)
5530+
.add(Accumhi)
5531+
.addReg(LaneValueHi->getOperand(0).getReg());
55405532
NewAccumulator = BuildMI(*ComputeLoop, I, DL,
55415533
TII->get(TargetOpcode::REG_SEQUENCE), DstReg)
5542-
.addReg(DestLoComputation->getOperand(0).getReg())
5534+
.addReg(DestLo)
55435535
.addImm(AMDGPU::sub0)
5544-
.addReg(DestHiComputation->getOperand(0).getReg())
5536+
.addReg(DestHi)
55455537
.addImm(AMDGPU::sub1);
55465538
break;
55475539
}
@@ -5550,21 +5542,19 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55505542
// Manipulate the iterator to get the next active lane
55515543
unsigned BITSETOpc =
55525544
IsWave32 ? AMDGPU::S_BITSET0_B32 : AMDGPU::S_BITSET0_B64;
5553-
auto NewActiveBits =
5554-
BuildMI(*ComputeLoop, I, DL, TII->get(BITSETOpc), NewActiveBitsReg)
5555-
.addReg(FF1->getOperand(0).getReg())
5556-
.addReg(ActiveBits->getOperand(0).getReg());
5545+
BuildMI(*ComputeLoop, I, DL, TII->get(BITSETOpc), NewActiveBitsReg)
5546+
.addReg(FF1Reg)
5547+
.addReg(ActiveBitsReg);
55575548

55585549
// Add phi nodes
55595550
Accumulator.addReg(NewAccumulator->getOperand(0).getReg())
55605551
.addMBB(ComputeLoop);
5561-
ActiveBits.addReg(NewActiveBits->getOperand(0).getReg())
5562-
.addMBB(ComputeLoop);
5552+
ActiveBits.addReg(NewActiveBitsReg).addMBB(ComputeLoop);
55635553

55645554
// Creating branching
55655555
unsigned CMPOpc = IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64;
55665556
BuildMI(*ComputeLoop, I, DL, TII->get(CMPOpc))
5567-
.addReg(NewActiveBits->getOperand(0).getReg())
5557+
.addReg(NewActiveBitsReg)
55685558
.addImm(0);
55695559
BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
55705560
.addMBB(ComputeLoop);

0 commit comments

Comments
 (0)