|
18 | 18 | #include "GCNSubtarget.h"
|
19 | 19 | #include "SIMachineFunctionInfo.h"
|
20 | 20 | #include "Utils/AMDGPUBaseInfo.h"
|
| 21 | +#include "llvm/ADT/STLExtras.h" |
21 | 22 | #include "llvm/Analysis/ValueTracking.h"
|
22 | 23 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
23 | 24 | #include "llvm/CodeGen/LiveIntervals.h"
|
@@ -5534,6 +5535,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
5534 | 5535 | }
|
5535 | 5536 | }
|
5536 | 5537 |
|
| 5538 | + // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more |
| 5539 | + // information. |
| 5540 | + if (AMDGPU::isPackedFP32Inst(Opcode) && AMDGPU::isGFX12Plus(ST)) { |
| 5541 | + for (unsigned I = 0; I < 3; ++I) { |
| 5542 | + if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I)) |
| 5543 | + return false; |
| 5544 | + } |
| 5545 | + } |
| 5546 | + |
5537 | 5547 | return true;
|
5538 | 5548 | }
|
5539 | 5549 |
|
@@ -6005,6 +6015,21 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
|
6005 | 6015 | const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx];
|
6006 | 6016 | unsigned Opc = MI.getOpcode();
|
6007 | 6017 |
|
| 6018 | + // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more |
| 6019 | + // information. |
| 6020 | + if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) && |
| 6021 | + MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { |
| 6022 | + constexpr const AMDGPU::OpName OpNames[] = { |
| 6023 | + AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}; |
| 6024 | + |
| 6025 | + for (auto [I, OpName] : enumerate(OpNames)) { |
| 6026 | + int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[I]); |
| 6027 | + if (static_cast<unsigned>(SrcIdx) == OpIdx && |
| 6028 | + !isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I, &MO)) |
| 6029 | + return false; |
| 6030 | + } |
| 6031 | + } |
| 6032 | + |
6008 | 6033 | if (!isLegalRegOperand(MRI, OpInfo, MO))
|
6009 | 6034 | return false;
|
6010 | 6035 |
|
@@ -6053,6 +6078,39 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
|
6053 | 6078 | return true;
|
6054 | 6079 | }
|
6055 | 6080 |
|
| 6081 | +bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand( |
| 6082 | + const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, |
| 6083 | + const MachineOperand *MO) const { |
| 6084 | + constexpr const unsigned NumOps = 3; |
| 6085 | + constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { |
| 6086 | + AMDGPU::OpName::src0, AMDGPU::OpName::src1, |
| 6087 | + AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers, |
| 6088 | + AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; |
| 6089 | + |
| 6090 | + assert(SrcN < NumOps); |
| 6091 | + |
| 6092 | + if (!MO) { |
| 6093 | + int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[SrcN]); |
| 6094 | + if (SrcIdx == -1) |
| 6095 | + return true; |
| 6096 | + MO = &MI.getOperand(SrcIdx); |
| 6097 | + } |
| 6098 | + |
| 6099 | + if (!MO->isReg() || !RI.isSGPRReg(MRI, MO->getReg())) |
| 6100 | + return true; |
| 6101 | + |
| 6102 | + int ModsIdx = |
| 6103 | + AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[NumOps + SrcN]); |
| 6104 | + if (ModsIdx == -1) |
| 6105 | + return true; |
| 6106 | + |
| 6107 | + unsigned Mods = MI.getOperand(ModsIdx).getImm(); |
| 6108 | + bool OpSel = Mods & SISrcMods::OP_SEL_0; |
| 6109 | + bool OpSelHi = Mods & SISrcMods::OP_SEL_1; |
| 6110 | + |
| 6111 | + return !OpSel && !OpSelHi; |
| 6112 | +} |
| 6113 | + |
6056 | 6114 | bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
|
6057 | 6115 | const MachineOperand *MO) const {
|
6058 | 6116 | const MachineFunction &MF = *MI.getParent()->getParent();
|
@@ -6390,6 +6448,15 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
|
6390 | 6448 | if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
|
6391 | 6449 | !RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
|
6392 | 6450 | legalizeOpWithMove(MI, VOP3Idx[2]);
|
| 6451 | + |
| 6452 | + // Fix the register class of packed FP32 instructions on gfx12+. See |
| 6453 | + // SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information. |
| 6454 | + if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) { |
| 6455 | + for (unsigned I = 0; I < 3; ++I) { |
| 6456 | + if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, /*SrcN=*/I)) |
| 6457 | + legalizeOpWithMove(MI, VOP3Idx[I]); |
| 6458 | + } |
| 6459 | + } |
6393 | 6460 | }
|
6394 | 6461 |
|
6395 | 6462 | Register SIInstrInfo::readlaneVGPRToSGPR(
|
|
0 commit comments