diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index e63e77a8302c0..7a4bc392bfc47 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -454,6 +454,21 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, .setMMRAMetadata(MIMD.getMMRAMetadata()); } +/// This version of the builder inserts the newly-built instruction after the +/// given position in the given MachineBasicBlock, and does NOT take a +/// destination register. +inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const MIMetadata &MIMD, + const MCInstrDesc &MCID) { + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); + BB.insertAfter(I, MI); + return MachineInstrBuilder(MF, MI) + .setPCSections(MIMD.getPCSections()) + .setMMRAMetadata(MIMD.getMMRAMetadata()); +} + inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, const MIMetadata &MIMD, diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index ea33a229110c1..91691ea96942d 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -90,6 +90,36 @@ struct GCNRegPressure { DynamicVGPRBlockSize)); } + unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) { + if (!ST.hasGFX90AInsts()) + return 0; + + auto MaxVectorRegs = ST.getMaxNumVectorRegs(MF.getFunction()); + unsigned ArchVGPRThreshold = MaxVectorRegs.first; + unsigned AGPRThreshold = MaxVectorRegs.second; + + unsigned ArchPressure = getArchVGPRNum(); + unsigned AGPRPressure = getAGPRNum(); + + unsigned ArchSpill = ArchPressure > ArchVGPRThreshold + ? (ArchPressure - ArchVGPRThreshold) + : 0; + unsigned AGPRSpill = + AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0; + + unsigned UnifiedSpill = 0; + + if (ST.hasGFX90AInsts()) { + unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF); + unsigned UnifiedPressure = getVGPRNum(true); + UnifiedSpill = UnifiedPressure > CombinedThreshold + ? (UnifiedPressure - CombinedThreshold) + : 0; + } + + return std::max(UnifiedSpill, (ArchSpill + AGPRSpill)); + } + void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index ce1ce687d0038..564021740b90c 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -29,6 +29,7 @@ #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" @@ -528,6 +529,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); + SchedStages.push_back(GCNSchedStageID::RewriteSchedule); SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); @@ -778,6 +780,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { switch (SchedStageID) { case GCNSchedStageID::OccInitialSchedule: return std::make_unique(SchedStageID, *this); + case GCNSchedStageID::RewriteSchedule: + return std::make_unique(SchedStageID, *this); case GCNSchedStageID::UnclusteredHighRPReschedule: return std::make_unique(SchedStageID, *this); case GCNSchedStageID::ClusteredLowOccupancyReschedule: @@ -898,13 +902,11 @@ GCNScheduleDAGMILive::getRegionLiveInMap() const { RegionFirstMIs.reserve(Regions.size()); auto I = Regions.rbegin(), E = Regions.rend(); do { - const MachineBasicBlock *MBB = I->first->getParent(); auto *MI = &*skipDebugInstructionsForward(I->first, I->second); RegionFirstMIs.push_back(MI); - do { - ++I; - } while (I != E && I->first->getParent() == MBB); + ++I; } while (I != E); + return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS); } @@ -1003,6 +1005,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { case GCNSchedStageID::OccInitialSchedule: OS << "Max Occupancy Initial Schedule"; break; + case GCNSchedStageID::RewriteSchedule: + OS << "Instruction Rewriting Reschedule"; + break; case GCNSchedStageID::UnclusteredHighRPReschedule: OS << "Unclustered High Register Pressure Reschedule"; break; @@ -1036,6 +1041,112 @@ bool GCNSchedStage::initGCNSchedStage() { return true; } +SlotIndex +RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO, + LiveIntervals *LIS, + SmallVectorImpl &DefIdxs) { + assert(UseMO.isReg()); + MachineInstr *UseMI = UseMO.getParent(); + LiveInterval &UseLI = LIS->getInterval(UseMO.getReg()); + auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); + + SlotIndex DefMBBStart = + LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def)); + + // If the def is in the block, then it must be the only reaching def. + if (DefMBBStart != VNInfo->def) { + DefIdxs.push_back(VNInfo->def); + return VNInfo->def; + } + + SmallPtrSet Visited; + SmallVector Worklist; + + Visited.insert(UseMI->getParent()); + + // Mark the predecessor blocks for traversal + for (auto PredMBB : UseMI->getParent()->predecessors()) { + Worklist.push_back(PredMBB); + Visited.insert(PredMBB); + } + + while (!Worklist.empty()) { + MachineBasicBlock *CurrMBB = Worklist.pop_back_val(); + + SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB); + auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot()); + + MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def); + SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB); + + // If there is a def in this block, then add it to the list. This is the + // reaching def of this path. + if (DefMBBStart != VNInfo->def) { + DefIdxs.push_back(VNInfo->def); + continue; + } + + for (auto PredMBB : DefMBB->predecessors()) { + if (Visited.insert(PredMBB).second) + Worklist.push_back(PredMBB); + } + } + + return VNInfo->def; +} + +void RewriteScheduleStage::findReachingUses( + MachineInstr *DefMI, LiveIntervals *LIS, + SmallVectorImpl &ReachingUses) { + SlotIndex DefIdx = LIS->getInstructionIndex(*DefMI); + for (auto &UseMO : + DAG.MRI.use_nodbg_operands(DefMI->getOperand(0).getReg())) { + SmallVector ReachingDefIndexes; + findReachingDefs(UseMO, LIS, ReachingDefIndexes); + + // If we find a use that contains this DefMI in its reachingDefs, then it is + // a reaching use. + if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) { + return SlotIndex::isSameInstr(RDIdx, DefIdx); + }) != ReachingDefIndexes.end()) + ReachingUses.push_back(&UseMO); + } +} + +bool RewriteScheduleStage::initGCNSchedStage() { + const GCNSubtarget &ST = MF.getSubtarget(); + + RegionsWithExcessArchVGPR.resize(DAG.Regions.size()); + RegionsWithExcessArchVGPR.reset(); + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + auto PressureBefore = DAG.Pressure[Region]; + if (PressureBefore.getArchVGPRNum() > ST.getAddressableNumArchVGPRs()) + RegionsWithExcessArchVGPR[Region] = true; + } + + if (!ST.hasGFX90AInsts() || RegionsWithExcessArchVGPR.none()) + return false; + + TII = ST.getInstrInfo(); + SRI = ST.getRegisterInfo(); + + std::vector> RewriteCands; + DenseMap> CopyForUse; + SmallPtrSet CopyForDef; + + if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef)) + return false; + + int64_t Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef); + + // If we haven't found the beneficial conditions, prefer the VGPR form which + // may result in less cross RC copies. + if (Cost > 0) + return false; + + return rewrite(RewriteCands); +} + bool UnclusteredHighRPStage::initGCNSchedStage() { if (DisableUnclusterHighRP) return false; @@ -1642,6 +1753,534 @@ void GCNSchedStage::revertScheduling() { DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); } +bool RewriteScheduleStage::isRewriteCandidate(MachineInstr *MI) const { + + if (!static_cast(DAG.TII)->isMAI(*MI)) + return false; + return AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()) != -1; +} + +bool RewriteScheduleStage::initHeuristics( + std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef) { + // Prepare for the heuristics + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (isRewriteCandidate(&MI)) { + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + if (ReplacementOp == -1) + continue; + + RewriteCands.push_back({&MI, MI.getOpcode()}); + MI.setDesc(TII->get(ReplacementOp)); + + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2->isReg()) { + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + + // For any definition of the src2 register which is non-MFMA, we + // insert a copy. + for (SlotIndex RDIdx : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx); + if (!TII->isMAI(*RD)) + CopyForDef.insert(RD); + } + } + + MachineOperand &Dst = MI.getOperand(0); + SmallVector DstReachingUses; + + findReachingUses(&MI, DAG.LIS, DstReachingUses); + + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; + + // For any user of the result of the MFMA which is not an MFMA, we + // insert a copy. For a given register, we will only insert one copy + // per user block. + CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg()); + + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + + for (auto RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // For any definition of the user of the MFMA which is not an MFMA, + // we insert a copy. We do this to transform all the reaching defs + // of this use to AGPR. By doing this, we can insert a copy from + // AGPR to VGPR at the user rather than after the MFMA. + CopyForDef.insert(RD); + } + } + + // Do the rewrite to allow for updated RP calculation. + const TargetRegisterClass *VGPRRC = DAG.MRI.getRegClass(Dst.getReg()); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC); + DAG.MRI.setRegClass(Dst.getReg(), AGPRRC); + if (Src2->isReg()) + DAG.MRI.setRegClass(Src2->getReg(), AGPRRC); + } + } + } + + return true; +} + +int64_t RewriteScheduleStage::getRewriteCost( + std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef) { + MBFI.calculate(MF, MBPI, *DAG.MLI); + int64_t BestSpillCost = 0; + int64_t Cost = 0; + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + if (!RegionsWithExcessArchVGPR[Region]) + continue; + + auto PressureBefore = DAG.Pressure[Region]; + unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF); + + // For the cases we care about (i.e. ArchVGPR usage is greater than the + // addressable limit), rewriting alone should bring pressure to manageable + // level. If we find any such region, then the rewrite is potentially + // beneficial. + auto PressureAfter = DAG.getRealRegPressure(Region); + unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF); + + uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); + uint64_t BlockFreq = + MBFI.getBlockFreq(DAG.Regions[Region].first->getParent()) + .getFrequency(); + + bool RelativeFreqIsDenom = EntryFreq > BlockFreq; + uint64_t RelativeFreq = EntryFreq && BlockFreq + ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq + : BlockFreq / EntryFreq) + : 1; + + // This assumes perfect spilling / splitting -- using one spill / copy + // instruction and one restoreFrom / copy for each excess register, + int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2; + + // Also account for the block frequency. + if (RelativeFreqIsDenom) + SpillCost /= (int64_t)RelativeFreq; + else + SpillCost *= (int64_t)RelativeFreq; + + // If we have increased spilling in any block, just bail. + if (SpillCost > 0) + return SpillCost; + + if (SpillCost < BestSpillCost) + BestSpillCost = SpillCost; + } + + // Set the cost to the largest decrease in spill cost in order to not double + // count spill reductions. + Cost = BestSpillCost; + + assert(Cost <= 0); + + unsigned CopyCost = 0; + + uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); + + // For each CopyForDef, increase the cost by the register size while + // accounting for block frequency. + for (auto *DefMI : CopyForDef) { + auto DefReg = DefMI->getOperand(0).getReg(); + uint64_t DefFreq = + EntryFreq + ? MBFI.getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq + : 1; + + unsigned RegSize = DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(DefReg)); + unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); + CopyCost += NumRegs * DefFreq; + } + + // Account for CopyForUse copies in each block that the register is used. + for (auto &UseEntry : CopyForUse) { + uint64_t UseFreq = + EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq + : 1; + + for (auto UseReg : UseEntry.second) { + unsigned RegSize = + DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg)); + unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); + CopyCost += NumRegs * UseFreq; + } + } + + Cost += CopyCost; + + // Reset to the vgpr form. We must do rewriting after copy-insertion, as some + // defs of the register may require VGPR. + for (auto RI : RewriteCands) { + MachineInstr *MI = RI.first; + + assert(TII->isMAI(*MI)); + const TargetRegisterClass *AGPRRC = + DAG.MRI.getRegClass(MI->getOperand(0).getReg()); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(AGPRRC); + + MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); + assert(Src2); + + if (Src2->isReg()) { + DAG.MRI.setRegClass(Src2->getReg(), VGPRRC); + } + DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC); + MI->setDesc(TII->get(RI.second)); + } + + return Cost; +} + +bool RewriteScheduleStage::rewrite( + std::vector> &RewriteCands) { + DenseMap FirstMIToRegion; + DenseMap LastMIToRegion; + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + auto Entry = DAG.Regions[Region]; + if (Entry.first == Entry.second) + continue; + + FirstMIToRegion[&*Entry.first] = Region; + if (Entry.second != Entry.first->getParent()->end()) + LastMIToRegion[&*Entry.second] = Region; + } + + // Rewrite the MFMAs to AGPR, and insert any copies as needed. + // The general assumption of the algorithm (and the previous cost calculation) + // is that it is better to insert the copies in the MBB of the def of the src2 + // operands, and in the MBB of the user of the dest operands. This is based on + // the assumption that the MFMAs are likely to appear in loop bodies, while + // the src2 and dest operands are live-in / live-out of the loop. Due to this + // design, the algorithm for finding copy insertion points is more + // complicated. + // + // There are three main cases to handle: 1. the reaching defs of the src2 + // operands, 2. the reaching uses of the dst operands, and 3. the reaching + // defs of the reaching uses of the dst operand. + // + // In the first case, we simply insert copies after each of the reaching + // definitions. In the second case, we collect all the uses of a given dest + // and organize them by MBB. Then, we insert 1 copy for each MBB before the + // earliest use. Since the use may have multiple reaching defs, and since we + // want to replace the register it is using with the result of the copy, we + // must handle case 3. In the third case, we simply insert a copy after each + // of the reaching defs to connect to the copy of the reaching uses of the dst + // reg. This allows us to avoid inserting copies next to the' MFMAs. + // + // While inserting the copies, we maintain a map of operands which will use + // different regs (i.e. the result of the copies). For example, a case 1 src2 + // operand will use the register result of the copies after the reaching defs, + // as opposed to the original register. Now that we have completed our copy + // analysis and placement, we can bulk update the registers. We do this + // separately as to avoid complicating the reachingDef and reachingUse + // queries. + // + // While inserting the copies, we also maintain a list or registers which we + // will want to reclassify as AGPR. After doing the copy isnertion and the + // register replacement, we can finally do the reclassification. This uses the + // redef map, as the registers we are interested in reclassifying may be + // replaced by the result of a copy. We must do this after the copy analysis + // and placement as we must have an accurate redef map -- otherwise we may end + // up creating illegal instructions. + + // The original registers of the MFMA that need to be reclassified as AGPR + std::set RewriteRegs; + // The map of an original register in the MFMA to a new register (result of a + // copy) that it should be replaced with. + DenseMap RedefMap; + // The map of the original MFMA registers to the relevant MFMA operands. + DenseMap> ReplaceMap; + // The map of reaching defs for a given register -- to avoid duplicate copies. + DenseMap> ReachingDefCopyMap; + // The map of reaching uses for a given register by basic block -- to avoid + // duplicate copies and to calculate per MBB insert pts. + DenseMap>> + ReachingUseTracker; + + for (auto &RI : RewriteCands) { + MachineInstr &MI = *RI.first; + + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + if (ReplacementOp == -1) + continue; + MI.setDesc(TII->get(ReplacementOp)); + + // Case 1: insert copies for the reaching defs of the Src2Reg. + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + + if (Src2->isReg()) { + Register Src2Reg = Src2->getReg(); + if (!Src2Reg.isVirtual()) + return false; + + Register MappedReg = Src2->getReg(); + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + SmallVector Src2DefsReplace; + + for (auto RDIndex : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // If there is a non mai reaching def, then we need a copy. + if (find(Src2DefsReplace, RD) == Src2DefsReplace.end()) + Src2DefsReplace.push_back(RD); + } + + if (!Src2DefsReplace.empty()) { + if (RedefMap.contains(Src2Reg)) + MappedReg = RedefMap[Src2Reg]; + else { + assert(!ReachingDefCopyMap.contains(Src2Reg)); + const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg); + const TargetRegisterClass *VGPRRC = + SRI->getEquivalentVGPRClass(Src2RC); + + // Track the mapping of the original register to the new register. + MappedReg = DAG.MRI.createVirtualRegister(VGPRRC); + RedefMap[Src2Reg] = MappedReg; + } + + // If none exists, create a copy from this reaching def. + // We may have inserted a copy already in an earlier iteration. + for (MachineInstr *RD : Src2DefsReplace) { + // Do not create redundant copies. + if (ReachingDefCopyMap[Src2Reg].insert(RD).second) { + MachineInstrBuilder VGPRCopy = + BuildMIAfter(*RD->getParent(), RD->getIterator(), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(MappedReg, 0, 0) + .addUse(Src2Reg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this reaching def was the last MI in the region, update the + // region boundaries. + if (LastMIToRegion.contains(RD)) { + unsigned UpdateRegion = LastMIToRegion[RD]; + DAG.Regions[UpdateRegion].second = VGPRCopy; + LastMIToRegion.erase(RD); + } + } + } + } + + // Track the register for reclassification + RewriteRegs.insert(Src2Reg); + + // Always insert the operand for replacement. If this corresponds with a + // chain of tied-def we may not see the VGPR requirement until later. + ReplaceMap[Src2Reg].insert(Src2); + } + + // Case 2 and Case 3: insert copies before the reaching uses of the dsts, + // and after the reaching defs of the reaching uses of the dsts. + + MachineOperand *Dst = &MI.getOperand(0); + Register DstReg = Dst->getReg(); + if (!DstReg.isVirtual()) + return false; + + Register MappedReg = DstReg; + SmallVector DstReachingUses; + + SmallVector DstReachingUseCopies; + SmallVector DstUseDefsReplace; + + findReachingUses(&MI, DAG.LIS, DstReachingUses); + + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; + + // If there is a non mai reaching use, then we need a copy. + if (find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.end()) + DstReachingUseCopies.push_back(RUOp); + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + + for (auto RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // If there is a non mai reaching def of this reaching use, then we will + // need a copy. + if (find(DstUseDefsReplace, RD) == DstUseDefsReplace.end()) + DstUseDefsReplace.push_back(RD); + } + } + + if (!DstUseDefsReplace.empty()) { + if (RedefMap.contains(DstReg)) + MappedReg = RedefMap[DstReg]; + else { + assert(!ReachingDefCopyMap.contains(DstReg)); + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + + // Track the mapping of the original register to the new register. + MappedReg = DAG.MRI.createVirtualRegister(VGPRRC); + RedefMap[DstReg] = MappedReg; + } + + // If none exists, create a copy from this reaching def. + // We may have inserted a copy already in an earlier iteration. + for (MachineInstr *RD : DstUseDefsReplace) { + // Do not create reundant copies. + if (ReachingDefCopyMap[DstReg].insert(RD).second) { + MachineInstrBuilder VGPRCopy = + BuildMIAfter(*RD->getParent(), RD->getIterator(), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(MappedReg, 0, 0) + .addUse(DstReg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this reaching def was the last MI in the region, update the + // region boundaries. + if (LastMIToRegion.contains(RD)) { + unsigned UpdateRegion = LastMIToRegion[RD]; + DAG.Regions[UpdateRegion].second = VGPRCopy; + LastMIToRegion.erase(RD); + } + } + } + } + + for (MachineOperand *RU : DstReachingUseCopies) { + MachineBasicBlock *RUBlock = RU->getParent()->getParent(); + // Just keep track of the reaching use of this register by block. After we + // have scanned all the MFMAs we can find optimal insert pts. + if (RUBlock != MI.getParent()) { + ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU); + continue; + } + + // Special case, the use is in the same block as the MFMA. Insert the copy + // just before the use. + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC); + MachineInstr *UseInst = RU->getParent(); + MachineInstrBuilder VGPRCopy = + BuildMI(*UseInst->getParent(), UseInst->getIterator(), + UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(NewUseReg, 0, 0) + .addUse(DstReg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + // Since we know this use has only one reaching def, we can replace the + // use reg. + RU->setReg(NewUseReg); + // Track the copy source operand for replacement. + ReplaceMap[DstReg].insert(&VGPRCopy->getOperand(1)); + } + + // Track the register for reclassification + RewriteRegs.insert(DstReg); + // Insert the dst operand for replacement. If this dst is in a chain of + // tied-def MFMAs, and the first src2 needs to be replaced with a new reg, + // all the correspond operands need to be replaced. + ReplaceMap[DstReg].insert(Dst); + } + + // Handle the copies for dst uses. + for (auto RUBlockEntry : ReachingUseTracker) { + for (auto RUDst : RUBlockEntry.second) { + MachineOperand *OpBegin = *RUDst.second.begin(); + SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent()); + + // Find the earliest use in this block. + for (auto User : RUDst.second) { + SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent()); + if (SlotIndex::isEarlierInstr(NewInstPt, InstPt)) + InstPt = NewInstPt; + } + + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(RUDst.first); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC); + MachineInstr *UseInst = DAG.LIS->getInstructionFromIndex(InstPt); + + MachineInstrBuilder VGPRCopy = + BuildMI(*UseInst->getParent(), UseInst->getIterator(), + UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(NewUseReg, 0, 0) + .addUse(RUDst.first, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this UseInst was the first MI in the region, update the region + // boundaries. + if (LastMIToRegion.contains(UseInst)) { + unsigned UpdateRegion = FirstMIToRegion[UseInst]; + DAG.Regions[UpdateRegion].first = VGPRCopy; + LastMIToRegion.erase(UseInst); + } + + // Replace the operand for all users. + for (auto User : RUDst.second) { + User->setReg(NewUseReg); + } + + // Track the copy source operand for replacement. + ReplaceMap[RUDst.first].insert(&VGPRCopy->getOperand(1)); + } + } + + // We may have needed to insert copies after the reaching defs of the MFMAs. + // Replace the original register with the result of the copy for all relevant + // operands. + for (auto NewDef : RedefMap) { + Register OldReg = NewDef.first; + Register NewReg = NewDef.second; + + // Replace the register for any associated operand in the MFMA chain. + for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) { + ReplaceOp->setReg(NewReg); + } + } + + // Finally, do the reclassification of the MFMA registers. + for (auto RewriteReg : RewriteRegs) { + Register RegToRewrite = RewriteReg; + + // Be sure to update the replacement register and not the original. + if (RedefMap.contains(RewriteReg)) + RegToRewrite = RedefMap[RewriteReg]; + + const TargetRegisterClass *CurrRC = DAG.MRI.getRegClass(RegToRewrite); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC); + + DAG.MRI.setRegClass(RegToRewrite, AGPRRC); + } + + // Bulk update the LIS. + DAG.LIS->reanalyze(DAG.MF); + // Liveins may have been modified for cross RC copies + RegionPressureMap LiveInUpdater(&DAG, false); + LiveInUpdater.buildLiveRegMap(); + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) + DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region); + + return true; +} + bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat, SlotIndex OriginalIdx, SlotIndex RematIdx) const { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 94cd795bbc8f6..f5b8c6b0f16d4 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -16,6 +16,9 @@ #include "GCNRegPressure.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -28,11 +31,12 @@ class GCNSchedStage; enum class GCNSchedStageID : unsigned { OccInitialSchedule = 0, - UnclusteredHighRPReschedule = 1, - ClusteredLowOccupancyReschedule = 2, - PreRARematerialize = 3, - ILPInitialSchedule = 4, - MemoryClauseInitialSchedule = 5 + RewriteSchedule = 1, + UnclusteredHighRPReschedule = 2, + ClusteredLowOccupancyReschedule = 3, + PreRARematerialize = 4, + ILPInitialSchedule = 5, + MemoryClauseInitialSchedule = 6 }; #ifndef NDEBUG @@ -224,6 +228,7 @@ using RegionBoundaries = class GCNScheduleDAGMILive final : public ScheduleDAGMILive { friend class GCNSchedStage; friend class OccInitialScheduleStage; + friend class RewriteScheduleStage; friend class UnclusteredHighRPStage; friend class ClusteredLowOccStage; friend class PreRARematStage; @@ -401,6 +406,61 @@ class OccInitialScheduleStage : public GCNSchedStage { : GCNSchedStage(StageID, DAG) {} }; +class RewriteScheduleStage : public GCNSchedStage { +private: + // Record regions with excess archvgpr register pressure over the physical + // register limit. Register pressure in these regions usually will result in + // spilling. + BitVector RegionsWithExcessArchVGPR; + + MachineBranchProbabilityInfo MBPI; + MachineBlockFrequencyInfo MBFI; + + const SIInstrInfo *TII; + const SIRegisterInfo *SRI; + + /// Do a speculative rewrite and collect copy locations. The speculative + /// rewrite allows us to calulcate the RP of the code after the rewrite, and + /// the copy locations allow us to calculate the total cost of copies required + /// for the rewrite. Stores the rewritten instructions in \p RewriteCands , + /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the + /// copy locations for defs (of the MFMA operands) in \p CopyForDef + bool + initHeuristics(std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef); + + /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done + /// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy + /// costs, and \p RewriteCands to undo rewriting. + int64_t + getRewriteCost(std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef); + + /// Do the final rewrite on \p RewriteCands and insert any needed copies. + bool rewrite(std::vector> &RewriteCands); + + /// \returns true if this MI is a rewrite candidate. + bool isRewriteCandidate(MachineInstr *MI) const; + + /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p + /// DefIdx + SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS, + SmallVectorImpl &DefIdxs); + + /// Finds all the reaching uses of \p DefMI and stores the use operands in \p + /// ReachingUses + void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS, + SmallVectorImpl &ReachingUses); + +public: + bool initGCNSchedStage() override; + + RewriteScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) + : GCNSchedStage(StageID, DAG) {} +}; + class UnclusteredHighRPStage : public GCNSchedStage { private: // Save the initial occupancy before starting this stage. diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir new file mode 100644 index 0000000000000..73eeafb6bccc5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir @@ -0,0 +1,5591 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + define void @src2_singledef_singleuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_singleedef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_singledef_mixed() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @no_copy_for_mfma() #0 { + entry: + unreachable + } + + attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} +... + + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + bb.7: + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + bb.7: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + S_BRANCH %bb.5 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.5 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.5: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.8: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.8: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6]] + ; CHECK-NEXT: KILL [[COPY10]], [[COPY5]], [[COPY12]], [[COPY7]], [[COPY14]], [[COPY9]], [[COPY16]], [[COPY11]], [[COPY6]], [[COPY13]], [[COPY8]], [[COPY15]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %194:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %195:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %196:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %197:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %198:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %199:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + + bb.2: + KILL %89, %90, %91, %92, %93, %193, %194, %195, %196, %197, %198, %199 + + + bb.3: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + S_BRANCH %bb.5 + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + + bb.6: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + KILL %89, %90, %91, %92, %93, %193 + + bb.3: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.6 + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.6: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.8, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + + bb.7: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.9, implicit killed $scc + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.10 + + bb.9: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.10: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY7]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + KILL %89, %90, %91, %92, %93, %193 + + + bb.3: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + bb.4: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 384, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 384, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_singleuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + + +--- +name: src2_singledef_multiuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.8: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_multiuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %84.sub0:vreg_128_align2, 256, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY6]], 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_multiuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.9 + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.9: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.9 + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.9: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 256, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[DEF21:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF21:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF21]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + %94:vreg_128_align2 = IMPLICIT_DEF + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.9: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY11]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY2]], 0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + %94:vreg_128_align2 = IMPLICIT_DEF + + bb.8: + %95:vgpr_32 = V_ADD_U32_e32 %84.sub0, %64, implicit $exec + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: no_copy_for_mfma +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: no_copy_for_mfma + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF21]] + ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF22]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %88:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %88:vreg_128_align2 = IMPLICIT_DEF + S_BRANCH %bb.4 + + + bb.3: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir new file mode 100644 index 0000000000000..050e4bc5e941c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir @@ -0,0 +1,524 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + define void @more_copies_than_spills() #0 { + entry: + unreachable + } + + define void @less_copies_than_spills() #0 { + entry: + unreachable + } + + define void @low_pressure() { + entry: + unreachable + } + + attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} +... + + +--- +name: more_copies_than_spills +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: more_copies_than_spills + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.9, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.10(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF18]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub2, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10: + ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF18]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.10, implicit killed $scc + + bb.9: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.10: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %85.sub0, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub2, %64, implicit $exec + + bb.11: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %85.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + + +--- +name: less_copies_than_spills +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: less_copies_than_spills + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: low_pressure +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: low_pressure + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %5 + ; CHECK-NEXT: S_NOP 0, implicit-def %6 + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF5]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF6]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF4]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF12]], [[DEF6]], [[DEF7]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %5, implicit %6 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +...