diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp index 4deb2a9485e4d..3eb199bac95d1 100644 --- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp +++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp @@ -34,6 +34,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,6 +55,9 @@ class GCNPreRAOptimizationsImpl { bool processReg(Register Reg); + bool reconstrainRegClass(Register Reg, const TargetRegisterClass *NewRC, + const GCNSubtarget &ST) const; + public: GCNPreRAOptimizationsImpl(LiveIntervals *LS) : LIS(LS) {} bool run(MachineFunction &MF); @@ -225,6 +229,38 @@ bool GCNPreRAOptimizationsImpl::processReg(Register Reg) { return true; } +bool GCNPreRAOptimizationsImpl::reconstrainRegClass( + Register Reg, const TargetRegisterClass *NewRC, + const GCNSubtarget &ST) const { + const SIInstrInfo *TII = ST.getInstrInfo(); + const TargetRegisterClass *OldRC = MRI->getRegClass(Reg); + const TargetRegisterClass *ConstrainRC = NewRC; + + // Stop early if there is nothing to do. + if (!NewRC || NewRC == OldRC) + return false; + + // Accumulate constraints from all uses. + for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + // Apply the effect of the given operand to ConstrainRC. + MachineInstr *MI = MO.getParent(); + unsigned OpNo = &MO - &MI->getOperand(0); + ConstrainRC = MI->getRegClassConstraintEffect(OpNo, ConstrainRC, TII, TRI); + if (!ConstrainRC) + return false; + if (MI->isCopy()) { + MachineOperand &OtherOp = MI->getOperand(1 - OpNo); + if (!OtherOp.isReg()) + continue; + + if (!TRI->isVGPR(*MRI, OtherOp.getReg())) + return false; + } + } + MRI->setRegClass(Reg, ConstrainRC); + return true; +} + bool GCNPreRAOptimizationsLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -245,6 +281,10 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) { TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); TRI = ST.getRegisterInfo(); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + bool ContrainAVGPRs = + ST.hasGFX90AInsts() && MFI->getMaxArchVGPRPressure() && + (MFI->getMaxArchVGPRPressure() < ST.getAddressableNumArchVGPRs()); bool Changed = false; @@ -253,6 +293,15 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) { if (!LIS->hasInterval(Reg)) continue; const TargetRegisterClass *RC = MRI->getRegClass(Reg); + + // If we do not need to use AGPRs to assign AVRegs, it is beneficial + // to contrain them to VGPR as this allows for better initial assignment + // (based on register bitwidth). + if (ContrainAVGPRs && TRI->isVectorSuperClass(RC)) { + reconstrainRegClass(Reg, TRI->getEquivalentVGPRClass(RC), ST); + continue; + } + if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) continue; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index a6553083d722b..4825b549b6796 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1130,6 +1130,15 @@ bool PreRARematStage::initGCNSchedStage() { } void GCNSchedStage::finalizeGCNSchedStage() { + unsigned MaxArchVGPR = 0; + for (auto P : DAG.Pressure) { + if (P.getArchVGPRNum() > MaxArchVGPR) + MaxArchVGPR = P.getArchVGPRNum(); + } + + MF.getInfo()->setMaxArchVGPRPressure(MaxArchVGPR); + + DAG.finishBlock(); LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n"); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 75ce67c00228d..60d90276af043 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -713,6 +713,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( HasSpilledVGPRs(MFI.hasSpilledVGPRs()), HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), Occupancy(MFI.getOccupancy()), + MaxArchVGPRPressure(MFI.getMaxArchVGPRPressure()), ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), @@ -760,6 +761,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( MaxMemoryClusterDWords = YamlMFI.MaxMemoryClusterDWords; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; + MaxArchVGPRPressure = YamlMFI.MaxArchVGPRPressure; IsEntryFunction = YamlMFI.IsEntryFunction; NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; MemoryBound = YamlMFI.MemoryBound; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 274a60adb8d07..ed10a994ad75c 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -274,6 +274,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { // TODO: 10 may be a better default since it's the maximum. unsigned Occupancy = 0; + unsigned MaxArchVGPRPressure = 0; SmallVector SpillPhysVGPRS; SmallVector WWMReservedRegs; @@ -343,6 +344,7 @@ template <> struct MappingTraits { YamlIO.mapOptional("highBitsOf32BitAddress", MFI.HighBitsOf32BitAddress, 0u); YamlIO.mapOptional("occupancy", MFI.Occupancy, 0); + YamlIO.mapOptional("maxArchVGPRPressure", MFI.MaxArchVGPRPressure, 0u); YamlIO.mapOptional("spillPhysVGPRs", MFI.SpillPhysVGPRS); YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs); YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI); @@ -512,6 +514,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // Current recorded maximum possible occupancy. unsigned Occupancy; + // The max arch VGPR pressure found during scheduling. + unsigned MaxArchVGPRPressure; + // Maximum number of dwords that can be clusterred during instruction // scheduler stage. unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit; @@ -1176,6 +1181,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, return MayNeedAGPRs; } + unsigned getMaxArchVGPRPressure() const { return MaxArchVGPRPressure; } + + void setMaxArchVGPRPressure(unsigned NewArchVGPRPressure) { + MaxArchVGPRPressure = NewArchVGPRPressure; + } + // \returns true if a function has a use of AGPRs via inline asm or // has a call which may use it. bool mayUseAGPRs(const Function &F) const; diff --git a/llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir b/llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir new file mode 100644 index 0000000000000..6abfb49c763dd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -run-pass=amdgpu-pre-ra-optimizations -o - %s | FileCheck -check-prefix=UNIFIED %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-pre-ra-optimizations -o - %s | FileCheck -check-prefix=SPLIT %s + +--- +name: reconstrain +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } + maxArchVGPRPressure: 2 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; UNIFIED-LABEL: name: reconstrain + ; UNIFIED: liveins: $vgpr0, $vgpr1 + ; UNIFIED-NEXT: {{ $}} + ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]] + ; UNIFIED-NEXT: S_ENDPGM 0 + ; + ; SPLIT-LABEL: name: reconstrain + ; SPLIT: liveins: $vgpr0, $vgpr1 + ; SPLIT-NEXT: {{ $}} + ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]] + ; SPLIT-NEXT: S_ENDPGM 0 + %0:av_64_align2 = IMPLICIT_DEF + S_NOP 0, implicit %0 + S_ENDPGM 0 +... + +--- +name: unspecified_yaml +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; UNIFIED-LABEL: name: unspecified_yaml + ; UNIFIED: liveins: $vgpr0, $vgpr1 + ; UNIFIED-NEXT: {{ $}} + ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]] + ; UNIFIED-NEXT: S_ENDPGM 0 + ; + ; SPLIT-LABEL: name: unspecified_yaml + ; SPLIT: liveins: $vgpr0, $vgpr1 + ; SPLIT-NEXT: {{ $}} + ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]] + ; SPLIT-NEXT: S_ENDPGM 0 + %0:av_64_align2 = IMPLICIT_DEF + S_NOP 0, implicit %0 + S_ENDPGM 0 +... + +--- +name: constrain_highrp +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } + maxArchVGPRPressure: 255 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; UNIFIED-LABEL: name: constrain_highrp + ; UNIFIED: liveins: $vgpr0, $vgpr1 + ; UNIFIED-NEXT: {{ $}} + ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]] + ; UNIFIED-NEXT: S_ENDPGM 0 + ; + ; SPLIT-LABEL: name: constrain_highrp + ; SPLIT: liveins: $vgpr0, $vgpr1 + ; SPLIT-NEXT: {{ $}} + ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]] + ; SPLIT-NEXT: S_ENDPGM 0 + %0:av_64_align2 = IMPLICIT_DEF + S_NOP 0, implicit %0 + S_ENDPGM 0 +... + +--- +name: no_constrain_highrp +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } + maxArchVGPRPressure: 256 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; UNIFIED-LABEL: name: no_constrain_highrp + ; UNIFIED: liveins: $vgpr0, $vgpr1 + ; UNIFIED-NEXT: {{ $}} + ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]] + ; UNIFIED-NEXT: S_ENDPGM 0 + ; + ; SPLIT-LABEL: name: no_constrain_highrp + ; SPLIT: liveins: $vgpr0, $vgpr1 + ; SPLIT-NEXT: {{ $}} + ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]] + ; SPLIT-NEXT: S_ENDPGM 0 + %0:av_64_align2 = IMPLICIT_DEF + S_NOP 0, implicit %0 + S_ENDPGM 0 +... + +--- +name: no_constrain_highrp1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } + maxArchVGPRPressure: 257 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; UNIFIED-LABEL: name: no_constrain_highrp1 + ; UNIFIED: liveins: $vgpr0, $vgpr1 + ; UNIFIED-NEXT: {{ $}} + ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]] + ; UNIFIED-NEXT: S_ENDPGM 0 + ; + ; SPLIT-LABEL: name: no_constrain_highrp1 + ; SPLIT: liveins: $vgpr0, $vgpr1 + ; SPLIT-NEXT: {{ $}} + ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]] + ; SPLIT-NEXT: S_ENDPGM 0 + %0:av_64_align2 = IMPLICIT_DEF + S_NOP 0, implicit %0 + S_ENDPGM 0 +... + +--- +name: no_constrain_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } + maxArchVGPRPressure: 0 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; UNIFIED-LABEL: name: no_constrain_use + ; UNIFIED: liveins: $vgpr0, $vgpr1 + ; UNIFIED-NEXT: {{ $}} + ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; UNIFIED-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[DEF]] + ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]] + ; UNIFIED-NEXT: S_ENDPGM 0 + ; + ; SPLIT-LABEL: name: no_constrain_use + ; SPLIT: liveins: $vgpr0, $vgpr1 + ; SPLIT-NEXT: {{ $}} + ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF + ; SPLIT-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[DEF]] + ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]] + ; SPLIT-NEXT: S_ENDPGM 0 + %0:av_64_align2 = IMPLICIT_DEF + INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, %0 + S_NOP 0, implicit %0 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll index b514c49394d21..b82bc3fb8724d 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll @@ -39,6 +39,7 @@ ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 5 +; CHECK-NEXT: maxArchVGPRPressure: 0 ; CHECK-NEXT: scavengeFI: '%stack.0' ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' @@ -308,6 +309,7 @@ ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 5 +; CHECK-NEXT: maxArchVGPRPressure: 0 ; CHECK-NEXT: scavengeFI: '%stack.0' ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll index fc730f9e88454..9c38f9ef0315e 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -39,6 +39,7 @@ ; AFTER-PEI-NEXT: fp64-fp16-output-denormals: true ; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0 ; AFTER-PEI-NEXT: occupancy: 5 +; AFTER-PEI-NEXT: maxArchVGPRPressure: 0 ; AFTER-PEI-NEXT: scavengeFI: '%stack.3' ; AFTER-PEI-NEXT: vgprForAGPRCopy: '' ; AFTER-PEI-NEXT: sgprForEXECCopy: '' diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll index 5adef1433079d..cfb44e3c11171 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll @@ -40,6 +40,7 @@ ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: BitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 +; CHECK-NEXT: maxArchVGPRPressure: 3 ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3' diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll index fa40164aa02f0..343d4ed402296 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll @@ -40,6 +40,7 @@ ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: BitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 +; CHECK-NEXT: maxArchVGPRPressure: 3 ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3' diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 24565e4423d04..b49e203a65c4a 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -49,6 +49,7 @@ # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 8 +# FULL-NEXT: maxArchVGPRPressure: 0 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: longBranchReservedReg: '' @@ -156,6 +157,7 @@ body: | # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 +# FULL-NEXT: maxArchVGPRPressure: 0 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: longBranchReservedReg: '' @@ -234,6 +236,7 @@ body: | # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 +# FULL-NEXT: maxArchVGPRPressure: 0 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: longBranchReservedReg: '' @@ -313,6 +316,7 @@ body: | # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 +# FULL-NEXT: maxArchVGPRPressure: 0 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: sgprForEXECCopy: '' # FULL-NEXT: longBranchReservedReg: '' diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index a15271382f37d..c56dfeeab4ac6 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -50,6 +50,7 @@ ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 8 +; CHECK-NEXT: maxArchVGPRPressure: 2 ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: longBranchReservedReg: '' @@ -99,6 +100,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 +; CHECK-NEXT: maxArchVGPRPressure: 2 ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: longBranchReservedReg: '' @@ -172,6 +174,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 +; CHECK-NEXT: maxArchVGPRPressure: 0 ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: longBranchReservedReg: '' @@ -227,6 +230,7 @@ define void @function() { ; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: occupancy: 10 +; CHECK-NEXT: maxArchVGPRPressure: 0 ; CHECK-NEXT: vgprForAGPRCopy: '' ; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101' ; CHECK-NEXT: longBranchReservedReg: ''