diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 47329b2c2f4d2..9ea795318e9a2 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -65,6 +65,7 @@ add_llvm_target(RISCVCodeGen RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp + RISCVVectorConfigAnalysis.cpp RISCVVectorMaskDAGMutation.cpp RISCVVectorPeephole.cpp RISCVVLOptimizer.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index ae9410193efe1..a296488b41fe5 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -111,6 +111,8 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVPreLegalizerCombiner(); void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); +void initializeRISCVVectorConfigWrapperPassPass(PassRegistry &); + FunctionPass *createRISCVVLOptimizerPass(); void initializeRISCVVLOptimizerPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 90e1c47a71c89..f02106aacf3a9 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -9,23 +9,11 @@ // This file implements a function pass that inserts VSETVLI instructions where // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL // instructions. -// -// This pass consists of 3 phases: -// -// Phase 1 collects how each basic block affects VL/VTYPE. -// -// Phase 2 uses the information from phase 1 to do a data flow analysis to -// propagate the VL/VTYPE changes through the function. This gives us the -// VL/VTYPE at the start of each basic block. -// -// Phase 3 inserts VSETVLI instructions in each basic block. Information from -// phase 2 is used to prevent inserting a VSETVLI before the first vector -// instruction in the block if possible. -// //===----------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVSubtarget.h" +#include "RISCVVectorConfigAnalysis.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveDebugVariables.h" @@ -49,814 +37,18 @@ static cl::opt EnsureWholeVectorRegisterMoveValidVTYPE( namespace { -/// Given a virtual register \p Reg, return the corresponding VNInfo for it. -/// This will return nullptr if the virtual register is an implicit_def or -/// if LiveIntervals is not available. -static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, - const LiveIntervals *LIS) { - assert(Reg.isVirtual()); - if (!LIS) - return nullptr; - auto &LI = LIS->getInterval(Reg); - SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI); - return LI.getVNInfoBefore(SI); -} - static unsigned getVLOpNum(const MachineInstr &MI) { return RISCVII::getVLOpNum(MI.getDesc()); } -static unsigned getSEWOpNum(const MachineInstr &MI) { - return RISCVII::getSEWOpNum(MI.getDesc()); -} - -/// Get the EEW for a load or store instruction. Return std::nullopt if MI is -/// not a load or store which ignores SEW. -static std::optional getEEWForLoadStore(const MachineInstr &MI) { - switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { - default: - return std::nullopt; - case RISCV::VLE8_V: - case RISCV::VLSE8_V: - case RISCV::VSE8_V: - case RISCV::VSSE8_V: - return 8; - case RISCV::VLE16_V: - case RISCV::VLSE16_V: - case RISCV::VSE16_V: - case RISCV::VSSE16_V: - return 16; - case RISCV::VLE32_V: - case RISCV::VLSE32_V: - case RISCV::VSE32_V: - case RISCV::VSSE32_V: - return 32; - case RISCV::VLE64_V: - case RISCV::VLSE64_V: - case RISCV::VSE64_V: - case RISCV::VSSE64_V: - return 64; - } -} - -/// Return true if this is an operation on mask registers. Note that -/// this includes both arithmetic/logical ops and load/store (vlm/vsm). -static bool isMaskRegOp(const MachineInstr &MI) { - if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) - return false; - const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); - // A Log2SEW of 0 is an operation on mask registers only. - return Log2SEW == 0; -} - -/// Return true if the inactive elements in the result are entirely undefined. -/// Note that this is different from "agnostic" as defined by the vector -/// specification. Agnostic requires each lane to either be undisturbed, or -/// take the value -1; no other value is allowed. -static bool hasUndefinedPassthru(const MachineInstr &MI) { - - unsigned UseOpIdx; - if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) - // If there is no passthrough operand, then the pass through - // lanes are undefined. - return true; - - // All undefined passthrus should be $noreg: see - // RISCVDAGToDAGISel::doPeepholeNoRegPassThru - const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); -} - -/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. -static bool isVectorCopy(const TargetRegisterInfo *TRI, - const MachineInstr &MI) { - return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() && - RISCVRegisterInfo::isRVVRegClass( - TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg())); -} - -/// Which subfields of VL or VTYPE have values we need to preserve? -struct DemandedFields { - // Some unknown property of VL is used. If demanded, must preserve entire - // value. - bool VLAny = false; - // Only zero vs non-zero is used. If demanded, can change non-zero values. - bool VLZeroness = false; - // What properties of SEW we need to preserve. - enum : uint8_t { - SEWEqual = 3, // The exact value of SEW needs to be preserved. - SEWGreaterThanOrEqualAndLessThan64 = - 2, // SEW can be changed as long as it's greater - // than or equal to the original value, but must be less - // than 64. - SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater - // than or equal to the original value. - SEWNone = 0 // We don't need to preserve SEW at all. - } SEW = SEWNone; - enum : uint8_t { - LMULEqual = 2, // The exact value of LMUL needs to be preserved. - LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1. - LMULNone = 0 // We don't need to preserve LMUL at all. - } LMUL = LMULNone; - bool SEWLMULRatio = false; - bool TailPolicy = false; - bool MaskPolicy = false; - // If this is true, we demand that VTYPE is set to some legal state, i.e. that - // vill is unset. - bool VILL = false; - - // Return true if any part of VTYPE was used - bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; - } - - // Return true if any property of VL was used - bool usedVL() { - return VLAny || VLZeroness; - } - - // Mark all VTYPE subfields and properties as demanded - void demandVTYPE() { - SEW = SEWEqual; - LMUL = LMULEqual; - SEWLMULRatio = true; - TailPolicy = true; - MaskPolicy = true; - VILL = true; - } - - // Mark all VL properties as demanded - void demandVL() { - VLAny = true; - VLZeroness = true; - } - - static DemandedFields all() { - DemandedFields DF; - DF.demandVTYPE(); - DF.demandVL(); - return DF; - } - - // Make this the result of demanding both the fields in this and B. - void doUnion(const DemandedFields &B) { - VLAny |= B.VLAny; - VLZeroness |= B.VLZeroness; - SEW = std::max(SEW, B.SEW); - LMUL = std::max(LMUL, B.LMUL); - SEWLMULRatio |= B.SEWLMULRatio; - TailPolicy |= B.TailPolicy; - MaskPolicy |= B.MaskPolicy; - VILL |= B.VILL; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Support for debugging, callable in GDB: V->dump() - LLVM_DUMP_METHOD void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - - /// Implement operator<<. - void print(raw_ostream &OS) const { - OS << "{"; - OS << "VLAny=" << VLAny << ", "; - OS << "VLZeroness=" << VLZeroness << ", "; - OS << "SEW="; - switch (SEW) { - case SEWEqual: - OS << "SEWEqual"; - break; - case SEWGreaterThanOrEqual: - OS << "SEWGreaterThanOrEqual"; - break; - case SEWGreaterThanOrEqualAndLessThan64: - OS << "SEWGreaterThanOrEqualAndLessThan64"; - break; - case SEWNone: - OS << "SEWNone"; - break; - }; - OS << ", "; - OS << "LMUL="; - switch (LMUL) { - case LMULEqual: - OS << "LMULEqual"; - break; - case LMULLessThanOrEqualToM1: - OS << "LMULLessThanOrEqualToM1"; - break; - case LMULNone: - OS << "LMULNone"; - break; - }; - OS << ", "; - OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; - OS << "TailPolicy=" << TailPolicy << ", "; - OS << "MaskPolicy=" << MaskPolicy << ", "; - OS << "VILL=" << VILL; - OS << "}"; - } -#endif -}; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_ATTRIBUTE_USED -inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { - DF.print(OS); - return OS; -} -#endif - -static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) { - auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); - return Fractional || LMul == 1; -} - -/// Return true if moving from CurVType to NewVType is -/// indistinguishable from the perspective of an instruction (or set -/// of instructions) which use only the Used subfields and properties. -static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, - const DemandedFields &Used) { - switch (Used.SEW) { - case DemandedFields::SEWNone: - break; - case DemandedFields::SEWEqual: - if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqual: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) - return false; - break; - case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: - if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || - RISCVVType::getSEW(NewVType) >= 64) - return false; - break; - } - - switch (Used.LMUL) { - case DemandedFields::LMULNone: - break; - case DemandedFields::LMULEqual: - if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) - return false; - break; - case DemandedFields::LMULLessThanOrEqualToM1: - if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) - return false; - break; - } - - if (Used.SEWLMULRatio) { - auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), - RISCVVType::getVLMUL(CurVType)); - auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), - RISCVVType::getVLMUL(NewVType)); - if (Ratio1 != Ratio2) - return false; - } - - if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != - RISCVVType::isTailAgnostic(NewVType)) - return false; - if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != - RISCVVType::isMaskAgnostic(NewVType)) - return false; - return true; -} - -/// Return the fields and properties demanded by the provided instruction. -DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { - // This function works in coalesceVSETVLI too. We can still use the value of a - // SEW, VL, or Policy operand even though it might not be the exact value in - // the VL or VTYPE, since we only care about what the instruction originally - // demanded. - - // Most instructions don't use any of these subfeilds. - DemandedFields Res; - // Start conservative if registers are used - if (MI.isCall() || MI.isInlineAsm() || - MI.readsRegister(RISCV::VL, /*TRI=*/nullptr)) - Res.demandVL(); - if (MI.isCall() || MI.isInlineAsm() || - MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr)) - Res.demandVTYPE(); - // Start conservative on the unlowered form too - uint64_t TSFlags = MI.getDesc().TSFlags; - if (RISCVII::hasSEWOp(TSFlags)) { - Res.demandVTYPE(); - if (RISCVII::hasVLOp(TSFlags)) - if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); - !VLOp.isReg() || !VLOp.isUndef()) - Res.demandVL(); - - // Behavior is independent of mask policy. - if (!RISCVII::usesMaskPolicy(TSFlags)) - Res.MaskPolicy = false; - } - - // Loads and stores with implicit EEW do not demand SEW or LMUL directly. - // They instead demand the ratio of the two which is used in computing - // EMUL, but which allows us the flexibility to change SEW and LMUL - // provided we don't change the ratio. - // Note: We assume that the instructions initial SEW is the EEW encoded - // in the opcode. This is asserted when constructing the VSETVLIInfo. - if (getEEWForLoadStore(MI)) { - Res.SEW = DemandedFields::SEWNone; - Res.LMUL = DemandedFields::LMULNone; - } - - // Store instructions don't use the policy fields. - if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { - Res.TailPolicy = false; - Res.MaskPolicy = false; - } - - // If this is a mask reg operation, it only cares about VLMAX. - // TODO: Possible extensions to this logic - // * Probably ok if available VLMax is larger than demanded - // * The policy bits can probably be ignored.. - if (isMaskRegOp(MI)) { - Res.SEW = DemandedFields::SEWNone; - Res.LMUL = DemandedFields::LMULNone; - } - - // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. - if (RISCVInstrInfo::isScalarInsertInstr(MI)) { - Res.LMUL = DemandedFields::LMULNone; - Res.SEWLMULRatio = false; - Res.VLAny = false; - // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't - // need to preserve any other bits and are thus compatible with any larger, - // etype and can disregard policy bits. Warning: It's tempting to try doing - // this for any tail agnostic operation, but we can't as TA requires - // tail lanes to either be the original value or -1. We are writing - // unknown bits to the lanes here. - if (hasUndefinedPassthru(MI)) { - if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) && - !ST->hasVInstructionsF64()) - Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; - else - Res.SEW = DemandedFields::SEWGreaterThanOrEqual; - Res.TailPolicy = false; - } - } - - // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW. - if (RISCVInstrInfo::isScalarExtractInstr(MI)) { - assert(!RISCVII::hasVLOp(TSFlags)); - Res.LMUL = DemandedFields::LMULNone; - Res.SEWLMULRatio = false; - Res.TailPolicy = false; - Res.MaskPolicy = false; - } - - if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) { - const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); - // A slidedown/slideup with an *undefined* passthru can freely clobber - // elements not copied from the source vector (e.g. masked off, tail, or - // slideup's prefix). Notes: - // * We can't modify SEW here since the slide amount is in units of SEW. - // * VL=1 is special only because we have existing support for zero vs - // non-zero VL. We could generalize this if we had a VL > C predicate. - // * The LMUL1 restriction is for machines whose latency may depend on LMUL. - // * As above, this is only legal for tail "undefined" not "agnostic". - // * We avoid increasing vl if the subtarget has +vl-dependent-latency - if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() && - VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && - !ST->hasVLDependentLatency()) { - Res.VLAny = false; - Res.VLZeroness = true; - Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; - Res.TailPolicy = false; - } - - // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the - // same semantically as vmv.s.x. This is particularly useful since we don't - // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in - // it's place. Since a splat is non-constant time in LMUL, we do need to be - // careful to not increase the number of active vector registers (unlike for - // vmv.s.x.) - if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() && - VLOp.getImm() == 1 && hasUndefinedPassthru(MI) && - !ST->hasVLDependentLatency()) { - Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; - Res.SEWLMULRatio = false; - Res.VLAny = false; - if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) && - !ST->hasVInstructionsF64()) - Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; - else - Res.SEW = DemandedFields::SEWGreaterThanOrEqual; - Res.TailPolicy = false; - } - } - - // In §32.16.6, whole vector register moves have a dependency on SEW. At the - // MIR level though we don't encode the element type, and it gives the same - // result whatever the SEW may be. - // - // However it does need valid SEW, i.e. vill must be cleared. The entry to a - // function, calls and inline assembly may all set it, so make sure we clear - // it for whole register copies. Do this by leaving VILL demanded. - if (isVectorCopy(ST->getRegisterInfo(), MI)) { - Res.LMUL = DemandedFields::LMULNone; - Res.SEW = DemandedFields::SEWNone; - Res.SEWLMULRatio = false; - Res.TailPolicy = false; - Res.MaskPolicy = false; - } - - if (RISCVInstrInfo::isVExtractInstr(MI)) { - assert(!RISCVII::hasVLOp(TSFlags)); - // TODO: LMUL can be any larger value (without cost) - Res.TailPolicy = false; - } - - return Res; -} - -/// Defines the abstract state with which the forward dataflow models the -/// values of the VL and VTYPE registers after insertion. -class VSETVLIInfo { - struct AVLDef { - // Every AVLDef should have a VNInfo, unless we're running without - // LiveIntervals in which case this will be nullptr. - const VNInfo *ValNo; - Register DefReg; - }; - union { - AVLDef AVLRegDef; - unsigned AVLImm; - }; - - enum : uint8_t { - Uninitialized, - AVLIsReg, - AVLIsImm, - AVLIsVLMAX, - Unknown, // AVL and VTYPE are fully unknown - } State = Uninitialized; - - // Fields from VTYPE. - RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1; - uint8_t SEW = 0; - uint8_t TailAgnostic : 1; - uint8_t MaskAgnostic : 1; - uint8_t SEWLMULRatioOnly : 1; - -public: - VSETVLIInfo() - : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), - SEWLMULRatioOnly(false) {} - - static VSETVLIInfo getUnknown() { - VSETVLIInfo Info; - Info.setUnknown(); - return Info; - } - - bool isValid() const { return State != Uninitialized; } - void setUnknown() { State = Unknown; } - bool isUnknown() const { return State == Unknown; } - - void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { - assert(AVLReg.isVirtual()); - AVLRegDef.ValNo = VNInfo; - AVLRegDef.DefReg = AVLReg; - State = AVLIsReg; - } - - void setAVLImm(unsigned Imm) { - AVLImm = Imm; - State = AVLIsImm; - } - - void setAVLVLMAX() { State = AVLIsVLMAX; } - - bool hasAVLImm() const { return State == AVLIsImm; } - bool hasAVLReg() const { return State == AVLIsReg; } - bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } - Register getAVLReg() const { - assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); - return AVLRegDef.DefReg; - } - unsigned getAVLImm() const { - assert(hasAVLImm()); - return AVLImm; - } - const VNInfo *getAVLVNInfo() const { - assert(hasAVLReg()); - return AVLRegDef.ValNo; - } - // Most AVLIsReg infos will have a single defining MachineInstr, unless it was - // a PHI node. In that case getAVLVNInfo()->def will point to the block - // boundary slot and this will return nullptr. If LiveIntervals isn't - // available, nullptr is also returned. - const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const { - assert(hasAVLReg()); - if (!LIS || getAVLVNInfo()->isPHIDef()) - return nullptr; - auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def); - assert(MI); - return MI; - } - - void setAVL(const VSETVLIInfo &Info) { - assert(Info.isValid()); - if (Info.isUnknown()) - setUnknown(); - else if (Info.hasAVLReg()) - setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); - else if (Info.hasAVLVLMAX()) - setAVLVLMAX(); - else { - assert(Info.hasAVLImm()); - setAVLImm(Info.getAVLImm()); - } - } - - unsigned getSEW() const { return SEW; } - RISCVVType::VLMUL getVLMUL() const { return VLMul; } - bool getTailAgnostic() const { return TailAgnostic; } - bool getMaskAgnostic() const { return MaskAgnostic; } - - bool hasNonZeroAVL(const LiveIntervals *LIS) const { - if (hasAVLImm()) - return getAVLImm() > 0; - if (hasAVLReg()) { - if (auto *DefMI = getAVLDefMI(LIS)) - return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI); - } - if (hasAVLVLMAX()) - return true; - return false; - } - - bool hasEquallyZeroAVL(const VSETVLIInfo &Other, - const LiveIntervals *LIS) const { - if (hasSameAVL(Other)) - return true; - return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS)); - } - - bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const { - if (hasAVLReg() && Other.hasAVLReg()) { - assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && - "we either have intervals or we don't"); - if (!getAVLVNInfo()) - return getAVLReg() == Other.getAVLReg(); - return getAVLVNInfo()->id == Other.getAVLVNInfo()->id && - getAVLReg() == Other.getAVLReg(); - } - - if (hasAVLImm() && Other.hasAVLImm()) - return getAVLImm() == Other.getAVLImm(); - - if (hasAVLVLMAX()) - return Other.hasAVLVLMAX() && hasSameVLMAX(Other); - - return false; - } - - // Return true if the two lattice values are guaranteed to have - // the same AVL value at runtime. - bool hasSameAVL(const VSETVLIInfo &Other) const { - // Without LiveIntervals, we don't know which instruction defines a - // register. Since a register may be redefined, this means all AVLIsReg - // states must be treated as possibly distinct. - if (hasAVLReg() && Other.hasAVLReg()) { - assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && - "we either have intervals or we don't"); - if (!getAVLVNInfo()) - return false; - } - return hasSameAVLLatticeValue(Other); - } - - void setVTYPE(unsigned VType) { - assert(isValid() && !isUnknown() && - "Can't set VTYPE for uninitialized or unknown"); - VLMul = RISCVVType::getVLMUL(VType); - SEW = RISCVVType::getSEW(VType); - TailAgnostic = RISCVVType::isTailAgnostic(VType); - MaskAgnostic = RISCVVType::isMaskAgnostic(VType); - } - void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) { - assert(isValid() && !isUnknown() && - "Can't set VTYPE for uninitialized or unknown"); - VLMul = L; - SEW = S; - TailAgnostic = TA; - MaskAgnostic = MA; - } - - void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; } - - unsigned encodeVTYPE() const { - assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && - "Can't encode VTYPE for uninitialized or unknown"); - return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); - } - - bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } - - bool hasSameVTYPE(const VSETVLIInfo &Other) const { - assert(isValid() && Other.isValid() && - "Can't compare invalid VSETVLIInfos"); - assert(!isUnknown() && !Other.isUnknown() && - "Can't compare VTYPE in unknown state"); - assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && - "Can't compare when only LMUL/SEW ratio is valid."); - return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == - std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, - Other.MaskAgnostic); - } - - unsigned getSEWLMULRatio() const { - assert(isValid() && !isUnknown() && - "Can't use VTYPE for uninitialized or unknown"); - return RISCVVType::getSEWLMULRatio(SEW, VLMul); - } - - // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. - // Note that having the same VLMAX ensures that both share the same - // function from AVL to VL; that is, they must produce the same VL value - // for any given AVL value. - bool hasSameVLMAX(const VSETVLIInfo &Other) const { - assert(isValid() && Other.isValid() && - "Can't compare invalid VSETVLIInfos"); - assert(!isUnknown() && !Other.isUnknown() && - "Can't compare VTYPE in unknown state"); - return getSEWLMULRatio() == Other.getSEWLMULRatio(); - } - - bool hasCompatibleVTYPE(const DemandedFields &Used, - const VSETVLIInfo &Require) const { - return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); - } - - // Determine whether the vector instructions requirements represented by - // Require are compatible with the previous vsetvli instruction represented - // by this. MI is the instruction whose requirements we're considering. - bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, - const LiveIntervals *LIS) const { - assert(isValid() && Require.isValid() && - "Can't compare invalid VSETVLIInfos"); - // Nothing is compatible with Unknown. - if (isUnknown() || Require.isUnknown()) - return false; - - // If only our VLMAX ratio is valid, then this isn't compatible. - if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) - return false; - - if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) - return false; - - if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS)) - return false; - - return hasCompatibleVTYPE(Used, Require); - } - - bool operator==(const VSETVLIInfo &Other) const { - // Uninitialized is only equal to another Uninitialized. - if (!isValid()) - return !Other.isValid(); - if (!Other.isValid()) - return !isValid(); - - // Unknown is only equal to another Unknown. - if (isUnknown()) - return Other.isUnknown(); - if (Other.isUnknown()) - return isUnknown(); - - if (!hasSameAVLLatticeValue(Other)) - return false; - - // If the SEWLMULRatioOnly bits are different, then they aren't equal. - if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) - return false; - - // If only the VLMAX is valid, check that it is the same. - if (SEWLMULRatioOnly) - return hasSameVLMAX(Other); - - // If the full VTYPE is valid, check that it is the same. - return hasSameVTYPE(Other); - } - - bool operator!=(const VSETVLIInfo &Other) const { - return !(*this == Other); - } - - // Calculate the VSETVLIInfo visible to a block assuming this and Other are - // both predecessors. - VSETVLIInfo intersect(const VSETVLIInfo &Other) const { - // If the new value isn't valid, ignore it. - if (!Other.isValid()) - return *this; - - // If this value isn't valid, this must be the first predecessor, use it. - if (!isValid()) - return Other; - - // If either is unknown, the result is unknown. - if (isUnknown() || Other.isUnknown()) - return VSETVLIInfo::getUnknown(); - - // If we have an exact, match return this. - if (*this == Other) - return *this; - - // Not an exact match, but maybe the AVL and VLMAX are the same. If so, - // return an SEW/LMUL ratio only value. - if (hasSameAVL(Other) && hasSameVLMAX(Other)) { - VSETVLIInfo MergeInfo = *this; - MergeInfo.SEWLMULRatioOnly = true; - return MergeInfo; - } - - // Otherwise the result is unknown. - return VSETVLIInfo::getUnknown(); - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Support for debugging, callable in GDB: V->dump() - LLVM_DUMP_METHOD void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - - /// Implement operator<<. - /// @{ - void print(raw_ostream &OS) const { - OS << "{"; - if (!isValid()) - OS << "Uninitialized"; - if (isUnknown()) - OS << "unknown"; - if (hasAVLReg()) - OS << "AVLReg=" << llvm::printReg(getAVLReg()); - if (hasAVLImm()) - OS << "AVLImm=" << (unsigned)AVLImm; - if (hasAVLVLMAX()) - OS << "AVLVLMAX"; - OS << ", "; - - unsigned LMul; - bool Fractional; - std::tie(LMul, Fractional) = decodeVLMUL(VLMul); - - OS << "VLMul="; - if (Fractional) - OS << "mf"; - else - OS << "m"; - OS << LMul << ", " - << "SEW=e" << (unsigned)SEW << ", " - << "TailAgnostic=" << (bool)TailAgnostic << ", " - << "MaskAgnostic=" << (bool)MaskAgnostic << ", " - << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; - } -#endif -}; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_ATTRIBUTE_USED -inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { - V.print(OS); - return OS; -} -#endif - -struct BlockData { - // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this - // block. Calculated in Phase 2. - VSETVLIInfo Exit; - - // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor - // blocks. Calculated in Phase 2, and used by Phase 3. - VSETVLIInfo Pred; - - // Keeps track of whether the block is already in the queue. - bool InQueue = false; - - BlockData() = default; -}; - class RISCVInsertVSETVLI : public MachineFunctionPass { + RISCVVectorConfigInfo *VConfig; const RISCVSubtarget *ST; const TargetInstrInfo *TII; MachineRegisterInfo *MRI; // Possibly null! LiveIntervals *LIS; - std::vector BlockInfo; std::queue WorkList; public: @@ -873,6 +65,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -880,30 +73,17 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: - bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, - const VSETVLIInfo &CurInfo) const; bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB) const; void insertVSETVLI(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc DL, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); - void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; - void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; - bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, - VSETVLIInfo &Info) const; - void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); void insertReadVL(MachineBasicBlock &MBB); - bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, - const DemandedFields &Used) const; void coalesceVSETVLIs(MachineBasicBlock &MBB) const; - - VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; - VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; - void forwardVSETVLIAVL(VSETVLIInfo &Info) const; }; } // end anonymous namespace @@ -911,8 +91,11 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { char RISCVInsertVSETVLI::ID = 0; char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID; -INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, - false, false) +INITIALIZE_PASS_BEGIN(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, + false, false) +INITIALIZE_PASS_DEPENDENCY(RISCVVectorConfigWrapperPass) +INITIALIZE_PASS_END(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, + false, false) // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can // replace the AVL operand with the AVL of the defining vsetvli. E.g. @@ -922,132 +105,6 @@ INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, // -> // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 -void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const { - if (!Info.hasAVLReg()) - return; - const MachineInstr *DefMI = Info.getAVLDefMI(LIS); - if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI)) - return; - VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); - if (!DefInstrInfo.hasSameVLMAX(Info)) - return; - Info.setAVL(DefInstrInfo); -} - -// Return a VSETVLIInfo representing the changes made by this VSETVLI or -// VSETIVLI instruction. -VSETVLIInfo -RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { - VSETVLIInfo NewInfo; - if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { - NewInfo.setAVLImm(MI.getOperand(1).getImm()); - } else { - assert(MI.getOpcode() == RISCV::PseudoVSETVLI || - MI.getOpcode() == RISCV::PseudoVSETVLIX0); - if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) - NewInfo.setAVLVLMAX(); - else if (MI.getOperand(1).isUndef()) - // Otherwise use an AVL of 1 to avoid depending on previous vl. - NewInfo.setAVLImm(1); - else { - Register AVLReg = MI.getOperand(1).getReg(); - VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS); - NewInfo.setAVLRegDef(VNI, AVLReg); - } - } - NewInfo.setVTYPE(MI.getOperand(2).getImm()); - - forwardVSETVLIAVL(NewInfo); - - return NewInfo; -} - -static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, - RISCVVType::VLMUL VLMul) { - auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); - if (Fractional) - VLEN = VLEN / LMul; - else - VLEN = VLEN * LMul; - return VLEN/SEW; -} - -VSETVLIInfo -RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { - VSETVLIInfo InstrInfo; - const uint64_t TSFlags = MI.getDesc().TSFlags; - - bool TailAgnostic = true; - bool MaskAgnostic = true; - if (!hasUndefinedPassthru(MI)) { - // Start with undisturbed. - TailAgnostic = false; - MaskAgnostic = false; - - // If there is a policy operand, use it. - if (RISCVII::hasVecPolicyOp(TSFlags)) { - const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); - uint64_t Policy = Op.getImm(); - assert(Policy <= - (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) && - "Invalid Policy Value"); - TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC; - MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC; - } - - if (!RISCVII::usesMaskPolicy(TSFlags)) - MaskAgnostic = true; - } - - RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); - - unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); - // A Log2SEW of 0 is an operation on mask registers only. - unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; - assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); - - if (RISCVII::hasVLOp(TSFlags)) { - const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); - if (VLOp.isImm()) { - int64_t Imm = VLOp.getImm(); - // Convert the VLMax sentintel to X0 register. - if (Imm == RISCV::VLMaxSentinel) { - // If we know the exact VLEN, see if we can use the constant encoding - // for the VLMAX instead. This reduces register pressure slightly. - const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul); - if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31) - InstrInfo.setAVLImm(VLMAX); - else - InstrInfo.setAVLVLMAX(); - } - else - InstrInfo.setAVLImm(Imm); - } else if (VLOp.isUndef()) { - // Otherwise use an AVL of 1 to avoid depending on previous vl. - InstrInfo.setAVLImm(1); - } else { - VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS); - InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); - } - } else { - assert(RISCVInstrInfo::isScalarExtractInstr(MI) || - RISCVInstrInfo::isVExtractInstr(MI)); - // Pick a random value for state tracking purposes, will be ignored via - // the demanded fields mechanism - InstrInfo.setAVLImm(1); - } -#ifndef NDEBUG - if (std::optional EEW = getEEWForLoadStore(MI)) { - assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); - } -#endif - InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); - - forwardVSETVLIAVL(InstrInfo); - - return InstrInfo; -} - void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc DL, const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { @@ -1073,7 +130,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); DefMI && RISCVInstrInfo::isVectorConfigInstr(*DefMI)) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + VSETVLIInfo DefInfo = VConfig->getInfoForVSETVLI(*DefMI); if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0)) @@ -1150,208 +207,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, } } -/// Return true if a VSETVLI is required to transition from CurInfo to Require -/// given a set of DemandedFields \p Used. -bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used, - const VSETVLIInfo &Require, - const VSETVLIInfo &CurInfo) const { - if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) - return true; - - if (CurInfo.isCompatible(Used, Require, LIS)) - return false; - - return true; -} - -// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we -// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more -// places. -static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo, - const VSETVLIInfo &NewInfo, - DemandedFields &Demanded) { - VSETVLIInfo Info = NewInfo; - - if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && - !PrevInfo.isUnknown()) { - if (auto NewVLMul = RISCVVType::getSameRatioLMUL( - PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) - Info.setVLMul(*NewVLMul); - Demanded.LMUL = DemandedFields::LMULEqual; - } - - return Info; -} - -// Given an incoming state reaching MI, minimally modifies that state so that it -// is compatible with MI. The resulting state is guaranteed to be semantically -// legal for MI, but may not be the state requested by MI. -void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, - const MachineInstr &MI) const { - if (isVectorCopy(ST->getRegisterInfo(), MI) && - (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) { - // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may - // be coalesced into another vsetvli since we won't demand any fields. - VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly - NewInfo.setAVLImm(1); - NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true); - Info = NewInfo; - return; - } - - if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) - return; - - DemandedFields Demanded = getDemanded(MI, ST); - - const VSETVLIInfo NewInfo = computeInfoForInstr(MI); - assert(NewInfo.isValid() && !NewInfo.isUnknown()); - if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info)) - return; - - const VSETVLIInfo PrevInfo = Info; - if (!Info.isValid() || Info.isUnknown()) - Info = NewInfo; - - const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); - - // If MI only demands that VL has the same zeroness, we only need to set the - // AVL if the zeroness differs. This removes a vsetvli entirely if the types - // match or allows use of cheaper avl preserving variant if VLMAX doesn't - // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" - // variant, so we avoid the transform to prevent extending live range of an - // avl register operand. - // TODO: We can probably relax this for immediates. - bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) && - IncomingInfo.hasSameVLMAX(PrevInfo); - if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) - Info.setAVL(IncomingInfo); - - Info.setVTYPE( - ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) - .getVLMUL(), - ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), - // Prefer tail/mask agnostic since it can be relaxed to undisturbed later - // if needed. - (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || - IncomingInfo.getTailAgnostic(), - (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || - IncomingInfo.getMaskAgnostic()); - - // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep - // the AVL. - if (Info.hasSEWLMULRatioOnly()) { - VSETVLIInfo RatiolessInfo = IncomingInfo; - RatiolessInfo.setAVL(Info); - Info = RatiolessInfo; - } -} - -// Given a state with which we evaluated MI (see transferBefore above for why -// this might be different that the state MI requested), modify the state to -// reflect the changes MI might make. -void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, - const MachineInstr &MI) const { - if (RISCVInstrInfo::isVectorConfigInstr(MI)) { - Info = getInfoForVSETVLI(MI); - return; - } - - if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) { - // Update AVL to vl-output of the fault first load. - assert(MI.getOperand(1).getReg().isVirtual()); - if (LIS) { - auto &LI = LIS->getInterval(MI.getOperand(1).getReg()); - SlotIndex SI = - LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); - VNInfo *VNI = LI.getVNInfoAt(SI); - Info.setAVLRegDef(VNI, MI.getOperand(1).getReg()); - } else - Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg()); - return; - } - - // If this is something that updates VL/VTYPE that we don't know about, set - // the state to unknown. - if (MI.isCall() || MI.isInlineAsm() || - MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || - MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) - Info = VSETVLIInfo::getUnknown(); -} - -bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, - VSETVLIInfo &Info) const { - bool HadVectorOp = false; - - Info = BlockInfo[MBB.getNumber()].Pred; - for (const MachineInstr &MI : MBB) { - transferBefore(Info, MI); - - if (RISCVInstrInfo::isVectorConfigInstr(MI) || - RISCVII::hasSEWOp(MI.getDesc().TSFlags) || - isVectorCopy(ST->getRegisterInfo(), MI)) - HadVectorOp = true; - - transferAfter(Info, MI); - } - - return HadVectorOp; -} - -void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { - - BlockData &BBInfo = BlockInfo[MBB.getNumber()]; - - BBInfo.InQueue = false; - - // Start with the previous entry so that we keep the most conservative state - // we have ever found. - VSETVLIInfo InInfo = BBInfo.Pred; - if (MBB.pred_empty()) { - // There are no predecessors, so use the default starting status. - InInfo.setUnknown(); - } else { - for (MachineBasicBlock *P : MBB.predecessors()) - InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); - } - - // If we don't have any valid predecessor value, wait until we do. - if (!InInfo.isValid()) - return; - - // If no change, no need to rerun block - if (InInfo == BBInfo.Pred) - return; - - BBInfo.Pred = InInfo; - LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) - << " changed to " << BBInfo.Pred << "\n"); - - // Note: It's tempting to cache the state changes here, but due to the - // compatibility checks performed a blocks output state can change based on - // the input state. To cache, we'd have to add logic for finding - // never-compatible state changes. - VSETVLIInfo TmpStatus; - computeVLVTYPEChanges(MBB, TmpStatus); - - // If the new exit value matches the old exit value, we don't need to revisit - // any blocks. - if (BBInfo.Exit == TmpStatus) - return; - - BBInfo.Exit = TmpStatus; - LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) - << " changed to " << BBInfo.Exit << "\n"); - - // Add the successors to the work list so we can propagate the changed exit - // status. - for (MachineBasicBlock *S : MBB.successors()) - if (!BlockInfo[S->getNumber()].InQueue) { - BlockInfo[S->getNumber()].InQueue = true; - WorkList.push(S); - } -} - // If we weren't able to prove a vsetvli was directly unneeded, it might still // be unneeded if the AVL was a phi node where all incoming values are VL // outputs from the last VSETVLI in their respective basic blocks. @@ -1371,7 +226,7 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, const LiveRange &LR = LIS->getInterval(Require.getAVLReg()); for (auto *PBB : MBB.predecessors()) { - const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; + const VSETVLIInfo &PBBExit = VConfig->getInfo()[PBB->getNumber()].Exit; // We need the PHI input to the be the output of a VSET(I)VLI. const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB)); @@ -1383,7 +238,7 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, // We found a VSET(I)VLI make sure it matches the output of the // predecessor block. - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + VSETVLIInfo DefInfo = VConfig->getInfoForVSETVLI(*DefMI); if (DefInfo != PBBExit) return true; @@ -1400,13 +255,13 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, } void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { - VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; + VSETVLIInfo CurInfo = VConfig->getInfo()[MBB.getNumber()].Pred; // Track whether the prefix of the block we've scanned is transparent // (meaning has not yet changed the abstract state). bool PrefixTransparent = true; for (MachineInstr &MI : MBB) { const VSETVLIInfo PrevInfo = CurInfo; - transferBefore(CurInfo, MI); + VConfig->transferBefore(CurInfo, MI); // If this is an explicit VSETVLI or VSETIVLI, update our state. if (RISCVInstrInfo::isVectorConfigInstr(MI)) { @@ -1420,7 +275,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { } if (EnsureWholeVectorRegisterMoveValidVTYPE && - isVectorCopy(ST->getRegisterInfo(), MI)) { + RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI)) { if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) { insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo); PrefixTransparent = false; @@ -1492,10 +347,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) PrefixTransparent = false; - transferAfter(CurInfo, MI); + VConfig->transferAfter(CurInfo, MI); } - const auto &Info = BlockInfo[MBB.getNumber()]; + const auto &Info = VConfig->getInfo()[MBB.getNumber()]; if (CurInfo != Info.Exit) { LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); @@ -1511,13 +366,13 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { /// this is geared to catch the common case of a fixed length vsetvl in a single /// block loop when it could execute once in the preheader instead. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { - if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) + if (!VConfig->getInfo()[MBB.getNumber()].Pred.isUnknown()) return; MachineBasicBlock *UnavailablePred = nullptr; VSETVLIInfo AvailableInfo; for (MachineBasicBlock *P : MBB.predecessors()) { - const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; + const VSETVLIInfo &PredInfo = VConfig->getInfo()[P->getNumber()].Exit; if (PredInfo.isUnknown()) { if (UnavailablePred) return; @@ -1568,20 +423,20 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { // not make this change without reflowing later blocks as well. // 2) If we don't actually remove a transition, inserting a vsetvli // into the predecessor block would be correct, but unprofitable. - VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; + VSETVLIInfo OldInfo = VConfig->getInfo()[MBB.getNumber()].Pred; VSETVLIInfo CurInfo = AvailableInfo; int TransitionsRemoved = 0; for (const MachineInstr &MI : MBB) { const VSETVLIInfo LastInfo = CurInfo; const VSETVLIInfo LastOldInfo = OldInfo; - transferBefore(CurInfo, MI); - transferBefore(OldInfo, MI); + VConfig->transferBefore(CurInfo, MI); + VConfig->transferBefore(OldInfo, MI); if (CurInfo == LastInfo) TransitionsRemoved++; if (LastOldInfo == OldInfo) TransitionsRemoved--; - transferAfter(CurInfo, MI); - transferAfter(OldInfo, MI); + VConfig->transferAfter(CurInfo, MI); + VConfig->transferAfter(OldInfo, MI); if (CurInfo == OldInfo) // Convergence. All transitions after this must match by construction. break; @@ -1593,12 +448,12 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { // Finally, update both data flow state and insert the actual vsetvli. // Doing both keeps the code in sync with the dataflow results, which // is critical for correctness of phase 3. - auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; + auto OldExit = VConfig->getInfo()[UnavailablePred->getNumber()].Exit; LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " << UnavailablePred->getName() << " with state " << AvailableInfo << "\n"); - BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; - BlockInfo[MBB.getNumber()].Pred = AvailableInfo; + VConfig->getInfo()[UnavailablePred->getNumber()].Exit = AvailableInfo; + VConfig->getInfo()[MBB.getNumber()].Pred = AvailableInfo; // Note there's an implicit assumption here that terminators never use // or modify VL or VTYPE. Also, fallthrough will return end(). @@ -1608,44 +463,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { AvailableInfo, OldExit); } -// Return true if we can mutate PrevMI to match MI without changing any the -// fields which would be observed. -bool RISCVInsertVSETVLI::canMutatePriorConfig( - const MachineInstr &PrevMI, const MachineInstr &MI, - const DemandedFields &Used) const { - // If the VL values aren't equal, return false if either a) the former is - // demanded, or b) we can't rewrite the former to be the later for - // implementation reasons. - if (!RISCVInstrInfo::isVLPreservingConfig(MI)) { - if (Used.VLAny) - return false; - - if (Used.VLZeroness) { - if (RISCVInstrInfo::isVLPreservingConfig(PrevMI)) - return false; - if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), - LIS)) - return false; - } - - auto &AVL = MI.getOperand(1); - - // If the AVL is a register, we need to make sure its definition is the same - // at PrevMI as it was at MI. - if (AVL.isReg() && AVL.getReg() != RISCV::X0) { - VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS); - VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS); - if (!VNI || !PrevVNI || VNI != PrevVNI) - return false; - } - } - - assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); - auto PriorVType = PrevMI.getOperand(2).getImm(); - auto VType = MI.getOperand(2).getImm(); - return areCompatibleVTYPEs(PriorVType, VType, Used); -} - void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { MachineInstr *NextMI = nullptr; // We can have arbitrary code in successors, so VL and VTYPE @@ -1673,7 +490,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { if (!RISCVInstrInfo::isVectorConfigInstr(MI)) { - Used.doUnion(getDemanded(MI, ST)); + Used.doUnion(RISCVVectorConfigInfo::getDemanded(MI, ST)); if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) @@ -1695,7 +512,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { continue; } - if (canMutatePriorConfig(MI, *NextMI, Used)) { + if (VConfig->canMutatePriorConfig(MI, *NextMI, Used)) { if (!RISCVInstrInfo::isVLPreservingConfig(*NextMI)) { Register DefReg = NextMI->getOperand(0).getReg(); @@ -1743,7 +560,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { } } NextMI = &MI; - Used = getDemanded(MI, ST); + Used = RISCVVectorConfigInfo::getDemanded(MI, ST); } // Loop over the dead AVL values, and delete them now. This has @@ -1796,51 +613,14 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); auto *LISWrapper = getAnalysisIfAvailable(); LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; + VConfig = &getAnalysis().getResult(); - assert(BlockInfo.empty() && "Expect empty block infos"); - BlockInfo.resize(MF.getNumBlockIDs()); - - bool HaveVectorOp = false; - - // Phase 1 - determine how VL/VTYPE are affected by the each block. - for (const MachineBasicBlock &MBB : MF) { - VSETVLIInfo TmpStatus; - HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); - // Initial exit state is whatever change we found in the block. - BlockData &BBInfo = BlockInfo[MBB.getNumber()]; - BBInfo.Exit = TmpStatus; - LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) - << " is " << BBInfo.Exit << "\n"); - - } - - // If we didn't find any instructions that need VSETVLI, we're done. - if (!HaveVectorOp) { - BlockInfo.clear(); + if (!VConfig->haveVectorOp()) return false; - } - - // Phase 2 - determine the exit VL/VTYPE from each block. We add all - // blocks to the list here, but will also add any that need to be revisited - // during Phase 2 processing. - for (const MachineBasicBlock &MBB : MF) { - WorkList.push(&MBB); - BlockInfo[MBB.getNumber()].InQueue = true; - } - while (!WorkList.empty()) { - const MachineBasicBlock &MBB = *WorkList.front(); - WorkList.pop(); - computeIncomingVLVTYPE(MBB); - } - // Perform partial redundancy elimination of vsetvli transitions. for (MachineBasicBlock &MBB : MF) doPRE(MBB); - // Phase 3 - add any vsetvli instructions needed in the block. Use the - // Phase 2 information to avoid adding vsetvlis before the first vector - // instruction in the block if the VL/VTYPE is satisfied by its - // predecessors. for (MachineBasicBlock &MBB : MF) emitVSETVLIs(MBB); @@ -1861,8 +641,7 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) insertReadVL(MBB); - BlockInfo.clear(); - return HaveVectorOp; + return true; } /// Returns an instance of the Insert VSETVLI pass. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 64f9e3eb8d86f..4a9acdbb68183 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1464,6 +1464,64 @@ bool RISCVInstrInfo::isFromLoadImm(const MachineRegisterInfo &MRI, return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm); } +bool RISCVInstrInfo::isVectorCopy(const TargetRegisterInfo *TRI, + const MachineInstr &MI) { + return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() && + RISCVRegisterInfo::isRVVRegClass( + TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg())); +} + +bool RISCVInstrInfo::isMaskRegOp(const MachineInstr &MI) { + if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) + return false; + const unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + // A Log2SEW of 0 is an operation on mask registers only. + return Log2SEW == 0; +} + +bool RISCVInstrInfo::hasUndefinedPassthru(const MachineInstr &MI) { + + unsigned UseOpIdx; + if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) + // If there is no passthrough operand, then the pass through + // lanes are undefined. + return true; + + // All undefined passthrus should be $noreg: see + // RISCVDAGToDAGISel::doPeepholeNoRegPassThru + const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); +} + +std::optional +RISCVInstrInfo::getEEWForLoadStore(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { + default: + return std::nullopt; + case RISCV::VLE8_V: + case RISCV::VLSE8_V: + case RISCV::VSE8_V: + case RISCV::VSSE8_V: + return 8; + case RISCV::VLE16_V: + case RISCV::VLSE16_V: + case RISCV::VSE16_V: + case RISCV::VSSE16_V: + return 16; + case RISCV::VLE32_V: + case RISCV::VLSE32_V: + case RISCV::VSE32_V: + case RISCV::VSSE32_V: + return 32; + case RISCV::VLE64_V: + case RISCV::VLSE64_V: + case RISCV::VSE64_V: + case RISCV::VSSE64_V: + return 64; + } +} + bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { bool IsSigned = false; bool IsEquality = false; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 785c8352d4a5e..35fa510a3d7a3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -321,6 +321,25 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { static bool isFromLoadImm(const MachineRegisterInfo &MRI, const MachineOperand &Op, int64_t &Imm); + /// Return true if \p MI is a copy that will be lowered to one or more + /// vmvNr.vs. + static bool isVectorCopy(const TargetRegisterInfo *TRI, + const MachineInstr &MI); + + /// Return true if this is an operation on mask registers. Note that + /// this includes both arithmetic/logical ops and load/store (vlm/vsm). + static bool isMaskRegOp(const MachineInstr &MI); + + /// Return true if the inactive elements in the result are entirely undefined. + /// Note that this is different from "agnostic" as defined by the vector + /// specification. Agnostic requires each lane to either be undisturbed, or + /// take the value -1; no other value is allowed. + static bool hasUndefinedPassthru(const MachineInstr &MI); + + /// Get the EEW for a load or store instruction. Return std::nullopt if MI is + /// not a load or store which ignores SEW. + static std::optional getEEWForLoadStore(const MachineInstr &MI); + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVVectorConfigAnalysis.cpp b/llvm/lib/Target/RISCV/RISCVVectorConfigAnalysis.cpp new file mode 100644 index 0000000000000..de3399769232a --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVVectorConfigAnalysis.cpp @@ -0,0 +1,946 @@ +//===- RISCVVectorConfigAnalysis ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the RISC-V analysis of vector unit config. +//===----------------------------------------------------------------------===// + +#include "RISCVVectorConfigAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-vconfig-analysis" + +static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) { + auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); + return Fractional || LMul == 1; +} + +static unsigned getVLOpNum(const MachineInstr &MI) { + return RISCVII::getVLOpNum(MI.getDesc()); +} + +static unsigned getSEWOpNum(const MachineInstr &MI) { + return RISCVII::getSEWOpNum(MI.getDesc()); +} + +/// Given a virtual register \p Reg, return the corresponding VNInfo for it. +/// This will return nullptr if the virtual register is an implicit_def or +/// if LiveIntervals is not available. +static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, + const LiveIntervals *LIS) { + assert(Reg.isVirtual()); + if (!LIS) + return nullptr; + auto &LI = LIS->getInterval(Reg); + SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI); + return LI.getVNInfoBefore(SI); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// Implement operator<<. +void DemandedFields::print(raw_ostream &OS) const { + OS << "{"; + OS << "VLAny=" << VLAny << ", "; + OS << "VLZeroness=" << VLZeroness << ", "; + OS << "SEW="; + switch (SEW) { + case SEWEqual: + OS << "SEWEqual"; + break; + case SEWGreaterThanOrEqual: + OS << "SEWGreaterThanOrEqual"; + break; + case SEWGreaterThanOrEqualAndLessThan64: + OS << "SEWGreaterThanOrEqualAndLessThan64"; + break; + case SEWNone: + OS << "SEWNone"; + break; + }; + OS << ", "; + OS << "LMUL="; + switch (LMUL) { + case LMULEqual: + OS << "LMULEqual"; + break; + case LMULLessThanOrEqualToM1: + OS << "LMULLessThanOrEqualToM1"; + break; + case LMULNone: + OS << "LMULNone"; + break; + }; + OS << ", "; + OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; + OS << "TailPolicy=" << TailPolicy << ", "; + OS << "MaskPolicy=" << MaskPolicy << ", "; + OS << "VILL=" << VILL; + OS << "}"; +} + +void VSETVLIInfo::print(raw_ostream &OS) const { + OS << "{"; + if (!isValid()) + OS << "Uninitialized"; + if (isUnknown()) + OS << "unknown"; + if (hasAVLReg()) + OS << "AVLReg=" << llvm::printReg(getAVLReg()); + if (hasAVLImm()) + OS << "AVLImm=" << (unsigned)AVLImm; + if (hasAVLVLMAX()) + OS << "AVLVLMAX"; + OS << ", "; + + unsigned LMul; + bool Fractional; + std::tie(LMul, Fractional) = decodeVLMUL(VLMul); + + OS << "VLMul="; + if (Fractional) + OS << "mf"; + else + OS << "m"; + OS << LMul << ", " + << "SEW=e" << (unsigned)SEW << ", " + << "TailAgnostic=" << (bool)TailAgnostic << ", " + << "MaskAgnostic=" << (bool)MaskAgnostic << ", " + << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; +} +#endif + +void DemandedFields::demandVTYPE() { + SEW = SEWEqual; + LMUL = LMULEqual; + SEWLMULRatio = true; + TailPolicy = true; + MaskPolicy = true; + VILL = true; +} + +void DemandedFields::doUnion(const DemandedFields &B) { + VLAny |= B.VLAny; + VLZeroness |= B.VLZeroness; + SEW = std::max(SEW, B.SEW); + LMUL = std::max(LMUL, B.LMUL); + SEWLMULRatio |= B.SEWLMULRatio; + TailPolicy |= B.TailPolicy; + MaskPolicy |= B.MaskPolicy; + VILL |= B.VILL; +} + +const MachineInstr *VSETVLIInfo::getAVLDefMI(const LiveIntervals *LIS) const { + assert(hasAVLReg()); + if (!LIS || getAVLVNInfo()->isPHIDef()) + return nullptr; + auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def); + assert(MI); + return MI; +} + +void VSETVLIInfo::setAVL(const VSETVLIInfo &Info) { + assert(Info.isValid()); + if (Info.isUnknown()) + setUnknown(); + else if (Info.hasAVLReg()) + setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); + else if (Info.hasAVLVLMAX()) + setAVLVLMAX(); + else { + assert(Info.hasAVLImm()); + setAVLImm(Info.getAVLImm()); + } +} + +bool VSETVLIInfo::hasNonZeroAVL(const LiveIntervals *LIS) const { + if (hasAVLImm()) + return getAVLImm() > 0; + if (hasAVLReg()) { + if (auto *DefMI = getAVLDefMI(LIS)) + return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI); + } + if (hasAVLVLMAX()) + return true; + return false; +} + +bool VSETVLIInfo::hasSameAVL(const VSETVLIInfo &Other) const { + // Without LiveIntervals, we don't know which instruction defines a + // register. Since a register may be redefined, this means all AVLIsReg + // states must be treated as possibly distinct. + if (hasAVLReg() && Other.hasAVLReg()) { + assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && + "we either have intervals or we don't"); + if (!getAVLVNInfo()) + return false; + } + return hasSameAVLLatticeValue(Other); +} + +bool VSETVLIInfo::hasSameAVLLatticeValue(const VSETVLIInfo &Other) const { + if (hasAVLReg() && Other.hasAVLReg()) { + assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && + "we either have intervals or we don't"); + if (!getAVLVNInfo()) + return getAVLReg() == Other.getAVLReg(); + return getAVLVNInfo()->id == Other.getAVLVNInfo()->id && + getAVLReg() == Other.getAVLReg(); + } + + if (hasAVLImm() && Other.hasAVLImm()) + return getAVLImm() == Other.getAVLImm(); + + if (hasAVLVLMAX()) + return Other.hasAVLVLMAX() && hasSameVLMAX(Other); + + return false; +} + +void VSETVLIInfo::setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) { + assert(isValid() && !isUnknown() && + "Can't set VTYPE for uninitialized or unknown"); + VLMul = L; + SEW = S; + TailAgnostic = TA; + MaskAgnostic = MA; +} + +void VSETVLIInfo::setVTYPE(unsigned VType) { + assert(isValid() && !isUnknown() && + "Can't set VTYPE for uninitialized or unknown"); + VLMul = RISCVVType::getVLMUL(VType); + SEW = RISCVVType::getSEW(VType); + TailAgnostic = RISCVVType::isTailAgnostic(VType); + MaskAgnostic = RISCVVType::isMaskAgnostic(VType); +} + +bool VSETVLIInfo::hasSameVTYPE(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && + "Can't compare when only LMUL/SEW ratio is valid."); + return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == + std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, + Other.MaskAgnostic); +} + +bool VSETVLIInfo::isCompatible(const DemandedFields &Used, + const VSETVLIInfo &Require, + const LiveIntervals *LIS) const { + assert(isValid() && Require.isValid() && + "Can't compare invalid VSETVLIInfos"); + // Nothing is compatible with Unknown. + if (isUnknown() || Require.isUnknown()) + return false; + + // If only our VLMAX ratio is valid, then this isn't compatible. + if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) + return false; + + if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) + return false; + + if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS)) + return false; + + return hasCompatibleVTYPE(Used, Require); +} + +bool VSETVLIInfo::operator==(const VSETVLIInfo &Other) const { + // Uninitialized is only equal to another Uninitialized. + if (!isValid()) + return !Other.isValid(); + if (!Other.isValid()) + return !isValid(); + + // Unknown is only equal to another Unknown. + if (isUnknown()) + return Other.isUnknown(); + if (Other.isUnknown()) + return isUnknown(); + + if (!hasSameAVLLatticeValue(Other)) + return false; + + // If the SEWLMULRatioOnly bits are different, then they aren't equal. + if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) + return false; + + // If only the VLMAX is valid, check that it is the same. + if (SEWLMULRatioOnly) + return hasSameVLMAX(Other); + + // If the full VTYPE is valid, check that it is the same. + return hasSameVTYPE(Other); +} + +VSETVLIInfo VSETVLIInfo::intersect(const VSETVLIInfo &Other) const { + // If the new value isn't valid, ignore it. + if (!Other.isValid()) + return *this; + + // If this value isn't valid, this must be the first predecessor, use it. + if (!isValid()) + return Other; + + // If either is unknown, the result is unknown. + if (isUnknown() || Other.isUnknown()) + return VSETVLIInfo::getUnknown(); + + // If we have an exact, match return this. + if (*this == Other) + return *this; + + // Not an exact match, but maybe the AVL and VLMAX are the same. If so, + // return an SEW/LMUL ratio only value. + if (hasSameAVL(Other) && hasSameVLMAX(Other)) { + VSETVLIInfo MergeInfo = *this; + MergeInfo.SEWLMULRatioOnly = true; + return MergeInfo; + } + + // Otherwise the result is unknown. + return VSETVLIInfo::getUnknown(); +} + +bool RISCVVectorConfigInfo::areCompatibleVTYPEs(uint64_t CurVType, + uint64_t NewVType, + const DemandedFields &Used) { + switch (Used.SEW) { + case DemandedFields::SEWNone: + break; + case DemandedFields::SEWEqual: + if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqual: + if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) + return false; + break; + case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: + if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || + RISCVVType::getSEW(NewVType) >= 64) + return false; + break; + } + + switch (Used.LMUL) { + case DemandedFields::LMULNone: + break; + case DemandedFields::LMULEqual: + if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) + return false; + break; + case DemandedFields::LMULLessThanOrEqualToM1: + if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) + return false; + break; + } + + if (Used.SEWLMULRatio) { + auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), + RISCVVType::getVLMUL(CurVType)); + auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), + RISCVVType::getVLMUL(NewVType)); + if (Ratio1 != Ratio2) + return false; + } + + if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != + RISCVVType::isTailAgnostic(NewVType)) + return false; + if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != + RISCVVType::isMaskAgnostic(NewVType)) + return false; + return true; +} + +bool VSETVLIInfo::hasCompatibleVTYPE(const DemandedFields &Used, + const VSETVLIInfo &Require) const { + return RISCVVectorConfigInfo::areCompatibleVTYPEs(Require.encodeVTYPE(), + encodeVTYPE(), Used); +} + +bool RISCVVectorConfigInfo::haveVectorOp() { return HaveVectorOp; } + +/// Return the fields and properties demanded by the provided instruction. +DemandedFields RISCVVectorConfigInfo::getDemanded(const MachineInstr &MI, + const RISCVSubtarget *ST) { + // This function works in coalesceVSETVLI too. We can still use the value of a + // SEW, VL, or Policy operand even though it might not be the exact value in + // the VL or VTYPE, since we only care about what the instruction originally + // demanded. + + // Most instructions don't use any of these subfeilds. + DemandedFields Res; + // Start conservative if registers are used + if (MI.isCall() || MI.isInlineAsm() || + MI.readsRegister(RISCV::VL, /*TRI=*/nullptr)) + Res.demandVL(); + if (MI.isCall() || MI.isInlineAsm() || + MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr)) + Res.demandVTYPE(); + // Start conservative on the unlowered form too + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + Res.demandVTYPE(); + if (RISCVII::hasVLOp(TSFlags)) + if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + !VLOp.isReg() || !VLOp.isUndef()) + Res.demandVL(); + + // Behavior is independent of mask policy. + if (!RISCVII::usesMaskPolicy(TSFlags)) + Res.MaskPolicy = false; + } + + // Loads and stores with implicit EEW do not demand SEW or LMUL directly. + // They instead demand the ratio of the two which is used in computing + // EMUL, but which allows us the flexibility to change SEW and LMUL + // provided we don't change the ratio. + // Note: We assume that the instructions initial SEW is the EEW encoded + // in the opcode. This is asserted when constructing the VSETVLIInfo. + if (RISCVInstrInfo::getEEWForLoadStore(MI)) { + Res.SEW = DemandedFields::SEWNone; + Res.LMUL = DemandedFields::LMULNone; + } + + // Store instructions don't use the policy fields. + if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { + Res.TailPolicy = false; + Res.MaskPolicy = false; + } + + // If this is a mask reg operation, it only cares about VLMAX. + // TODO: Possible extensions to this logic + // * Probably ok if available VLMax is larger than demanded + // * The policy bits can probably be ignored.. + if (RISCVInstrInfo::isMaskRegOp(MI)) { + Res.SEW = DemandedFields::SEWNone; + Res.LMUL = DemandedFields::LMULNone; + } + + // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. + if (RISCVInstrInfo::isScalarInsertInstr(MI)) { + Res.LMUL = DemandedFields::LMULNone; + Res.SEWLMULRatio = false; + Res.VLAny = false; + // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't + // need to preserve any other bits and are thus compatible with any larger, + // etype and can disregard policy bits. Warning: It's tempting to try doing + // this for any tail agnostic operation, but we can't as TA requires + // tail lanes to either be the original value or -1. We are writing + // unknown bits to the lanes here. + if (RISCVInstrInfo::hasUndefinedPassthru(MI)) { + if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) && + !ST->hasVInstructionsF64()) + Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; + else + Res.SEW = DemandedFields::SEWGreaterThanOrEqual; + Res.TailPolicy = false; + } + } + + // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW. + if (RISCVInstrInfo::isScalarExtractInstr(MI)) { + assert(!RISCVII::hasVLOp(TSFlags)); + Res.LMUL = DemandedFields::LMULNone; + Res.SEWLMULRatio = false; + Res.TailPolicy = false; + Res.MaskPolicy = false; + } + + if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) { + const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + // A slidedown/slideup with an *undefined* passthru can freely clobber + // elements not copied from the source vector (e.g. masked off, tail, or + // slideup's prefix). Notes: + // * We can't modify SEW here since the slide amount is in units of SEW. + // * VL=1 is special only because we have existing support for zero vs + // non-zero VL. We could generalize this if we had a VL > C predicate. + // * The LMUL1 restriction is for machines whose latency may depend on LMUL. + // * As above, this is only legal for tail "undefined" not "agnostic". + // * We avoid increasing vl if the subtarget has +vl-dependent-latency + if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() && + VLOp.getImm() == 1 && RISCVInstrInfo::hasUndefinedPassthru(MI) && + !ST->hasVLDependentLatency()) { + Res.VLAny = false; + Res.VLZeroness = true; + Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; + Res.TailPolicy = false; + } + + // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the + // same semantically as vmv.s.x. This is particularly useful since we don't + // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in + // it's place. Since a splat is non-constant time in LMUL, we do need to be + // careful to not increase the number of active vector registers (unlike for + // vmv.s.x.) + if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() && + VLOp.getImm() == 1 && RISCVInstrInfo::hasUndefinedPassthru(MI) && + !ST->hasVLDependentLatency()) { + Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; + Res.SEWLMULRatio = false; + Res.VLAny = false; + if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) && + !ST->hasVInstructionsF64()) + Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; + else + Res.SEW = DemandedFields::SEWGreaterThanOrEqual; + Res.TailPolicy = false; + } + } + + // In §32.16.6, whole vector register moves have a dependency on SEW. At the + // MIR level though we don't encode the element type, and it gives the same + // result whatever the SEW may be. + // + // However it does need valid SEW, i.e. vill must be cleared. The entry to a + // function, calls and inline assembly may all set it, so make sure we clear + // it for whole register copies. Do this by leaving VILL demanded. + if (RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI)) { + Res.LMUL = DemandedFields::LMULNone; + Res.SEW = DemandedFields::SEWNone; + Res.SEWLMULRatio = false; + Res.TailPolicy = false; + Res.MaskPolicy = false; + } + + if (RISCVInstrInfo::isVExtractInstr(MI)) { + assert(!RISCVII::hasVLOp(TSFlags)); + // TODO: LMUL can be any larger value (without cost) + Res.TailPolicy = false; + } + + return Res; +} + +// Given an incoming state reaching MI, minimally modifies that state so that it +// is compatible with MI. The resulting state is guaranteed to be semantically +// legal for MI, but may not be the state requested by MI. +void RISCVVectorConfigInfo::transferBefore(VSETVLIInfo &Info, + const MachineInstr &MI) const { + if (RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI) && + (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) { + // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may + // be coalesced into another vsetvli since we won't demand any fields. + VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly + NewInfo.setAVLImm(1); + NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true); + Info = NewInfo; + return; + } + + if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) + return; + + DemandedFields Demanded = getDemanded(MI, ST); + + const VSETVLIInfo NewInfo = computeInfoForInstr(MI); + assert(NewInfo.isValid() && !NewInfo.isUnknown()); + if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info)) + return; + + const VSETVLIInfo PrevInfo = Info; + if (!Info.isValid() || Info.isUnknown()) + Info = NewInfo; + + const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); + + // If MI only demands that VL has the same zeroness, we only need to set the + // AVL if the zeroness differs. This removes a vsetvli entirely if the types + // match or allows use of cheaper avl preserving variant if VLMAX doesn't + // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" + // variant, so we avoid the transform to prevent extending live range of an + // avl register operand. + // TODO: We can probably relax this for immediates. + bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) && + IncomingInfo.hasSameVLMAX(PrevInfo); + if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) + Info.setAVL(IncomingInfo); + + Info.setVTYPE( + ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) + .getVLMUL(), + ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), + // Prefer tail/mask agnostic since it can be relaxed to undisturbed later + // if needed. + (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || + IncomingInfo.getTailAgnostic(), + (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || + IncomingInfo.getMaskAgnostic()); + + // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep + // the AVL. + if (Info.hasSEWLMULRatioOnly()) { + VSETVLIInfo RatiolessInfo = IncomingInfo; + RatiolessInfo.setAVL(Info); + Info = RatiolessInfo; + } +} + +// Given a state with which we evaluated MI (see transferBefore above for why +// this might be different that the state MI requested), modify the state to +// reflect the changes MI might make. +void RISCVVectorConfigInfo::transferAfter(VSETVLIInfo &Info, + const MachineInstr &MI) const { + if (RISCVInstrInfo::isVectorConfigInstr(MI)) { + Info = getInfoForVSETVLI(MI); + return; + } + + if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) { + // Update AVL to vl-output of the fault first load. + assert(MI.getOperand(1).getReg().isVirtual()); + if (LIS) { + auto &LI = LIS->getInterval(MI.getOperand(1).getReg()); + SlotIndex SI = + LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); + VNInfo *VNI = LI.getVNInfoAt(SI); + Info.setAVLRegDef(VNI, MI.getOperand(1).getReg()); + } else + Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg()); + return; + } + + // If this is something that updates VL/VTYPE that we don't know about, set + // the state to unknown. + if (MI.isCall() || MI.isInlineAsm() || + MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || + MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) + Info = VSETVLIInfo::getUnknown(); +} + +unsigned RISCVVectorConfigInfo::computeVLMAX(unsigned VLEN, unsigned SEW, + RISCVVType::VLMUL VLMul) { + auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); + if (Fractional) + VLEN = VLEN / LMul; + else + VLEN = VLEN * LMul; + return VLEN / SEW; +} + +// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we +// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more +// places. +VSETVLIInfo RISCVVectorConfigInfo::adjustIncoming(const VSETVLIInfo &PrevInfo, + const VSETVLIInfo &NewInfo, + DemandedFields &Demanded) { + VSETVLIInfo Info = NewInfo; + + if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && + !PrevInfo.isUnknown()) { + if (auto NewVLMul = RISCVVType::getSameRatioLMUL( + PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) + Info.setVLMul(*NewVLMul); + Demanded.LMUL = DemandedFields::LMULEqual; + } + + return Info; +} + +bool RISCVVectorConfigInfo::needVSETVLI(const DemandedFields &Used, + const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const { + if (!CurInfo.isValid() || CurInfo.isUnknown() || + CurInfo.hasSEWLMULRatioOnly()) + return true; + + if (CurInfo.isCompatible(Used, Require, LIS)) + return false; + + return true; +} + +VSETVLIInfo +RISCVVectorConfigInfo::getInfoForVSETVLI(const MachineInstr &MI) const { + VSETVLIInfo NewInfo; + if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { + NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } else { + assert(MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETVLIX0); + if (MI.getOpcode() == RISCV::PseudoVSETVLIX0) + NewInfo.setAVLVLMAX(); + else if (MI.getOperand(1).isUndef()) + // Otherwise use an AVL of 1 to avoid depending on previous vl. + NewInfo.setAVLImm(1); + else { + Register AVLReg = MI.getOperand(1).getReg(); + VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS); + NewInfo.setAVLRegDef(VNI, AVLReg); + } + } + NewInfo.setVTYPE(MI.getOperand(2).getImm()); + + forwardVSETVLIAVL(NewInfo); + + return NewInfo; +} + +bool RISCVVectorConfigInfo::canMutatePriorConfig( + const MachineInstr &PrevMI, const MachineInstr &MI, + const DemandedFields &Used) const { + // If the VL values aren't equal, return false if either a) the former is + // demanded, or b) we can't rewrite the former to be the later for + // implementation reasons. + if (!RISCVInstrInfo::isVLPreservingConfig(MI)) { + if (Used.VLAny) + return false; + + if (Used.VLZeroness) { + if (RISCVInstrInfo::isVLPreservingConfig(PrevMI)) + return false; + if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), + LIS)) + return false; + } + + auto &AVL = MI.getOperand(1); + + // If the AVL is a register, we need to make sure its definition is the same + // at PrevMI as it was at MI. + if (AVL.isReg() && AVL.getReg() != RISCV::X0) { + VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS); + VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS); + if (!VNI || !PrevVNI || VNI != PrevVNI) + return false; + } + } + + assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); + auto PriorVType = PrevMI.getOperand(2).getImm(); + auto VType = MI.getOperand(2).getImm(); + return areCompatibleVTYPEs(PriorVType, VType, Used); +} + +VSETVLIInfo +RISCVVectorConfigInfo::computeInfoForInstr(const MachineInstr &MI) const { + VSETVLIInfo InstrInfo; + const uint64_t TSFlags = MI.getDesc().TSFlags; + + bool TailAgnostic = true; + bool MaskAgnostic = true; + if (!RISCVInstrInfo::hasUndefinedPassthru(MI)) { + // Start with undisturbed. + TailAgnostic = false; + MaskAgnostic = false; + + // If there is a policy operand, use it. + if (RISCVII::hasVecPolicyOp(TSFlags)) { + const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); + uint64_t Policy = Op.getImm(); + assert(Policy <= + (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) && + "Invalid Policy Value"); + TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC; + MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC; + } + + if (!RISCVII::usesMaskPolicy(TSFlags)) + MaskAgnostic = true; + } + + RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags); + + unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); + // A Log2SEW of 0 is an operation on mask registers only. + unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; + assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + + if (RISCVII::hasVLOp(TSFlags)) { + const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + if (VLOp.isImm()) { + int64_t Imm = VLOp.getImm(); + // Convert the VLMax sentintel to X0 register. + if (Imm == RISCV::VLMaxSentinel) { + // If we know the exact VLEN, see if we can use the constant encoding + // for the VLMAX instead. This reduces register pressure slightly. + const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul); + if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31) + InstrInfo.setAVLImm(VLMAX); + else + InstrInfo.setAVLVLMAX(); + } else + InstrInfo.setAVLImm(Imm); + } else if (VLOp.isUndef()) { + // Otherwise use an AVL of 1 to avoid depending on previous vl. + InstrInfo.setAVLImm(1); + } else { + VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS); + InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); + } + } else { + assert(RISCVInstrInfo::isScalarExtractInstr(MI) || + RISCVInstrInfo::isVExtractInstr(MI)); + // Pick a random value for state tracking purposes, will be ignored via + // the demanded fields mechanism + InstrInfo.setAVLImm(1); + } +#ifndef NDEBUG + if (std::optional EEW = RISCVInstrInfo::getEEWForLoadStore(MI)) { + assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); + } +#endif + InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + + forwardVSETVLIAVL(InstrInfo); + + return InstrInfo; +} + +bool RISCVVectorConfigInfo::computeVLVTYPEChanges(const MachineBasicBlock &MBB, + VSETVLIInfo &Info) const { + bool HadVectorOp = false; + + Info = BlockInfo[MBB.getNumber()].Pred; + for (const MachineInstr &MI : MBB) { + transferBefore(Info, MI); + + if (RISCVInstrInfo::isVectorConfigInstr(MI) || + RISCVII::hasSEWOp(MI.getDesc().TSFlags) || + RISCVInstrInfo::isVectorCopy(ST->getRegisterInfo(), MI)) + HadVectorOp = true; + + transferAfter(Info, MI); + } + + return HadVectorOp; +} + +void RISCVVectorConfigInfo::forwardVSETVLIAVL(VSETVLIInfo &Info) const { + if (!Info.hasAVLReg()) + return; + const MachineInstr *DefMI = Info.getAVLDefMI(LIS); + if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI)) + return; + VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); + if (!DefInstrInfo.hasSameVLMAX(Info)) + return; + Info.setAVL(DefInstrInfo); +} + +void RISCVVectorConfigInfo::computeIncomingVLVTYPE( + const MachineBasicBlock &MBB) { + + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + BBInfo.InQueue = false; + + // Start with the previous entry so that we keep the most conservative state + // we have ever found. + VSETVLIInfo InInfo = BBInfo.Pred; + if (MBB.pred_empty()) { + // There are no predecessors, so use the default starting status. + InInfo.setUnknown(); + } else { + for (MachineBasicBlock *P : MBB.predecessors()) + InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); + } + + // If we don't have any valid predecessor value, wait until we do. + if (!InInfo.isValid()) + return; + + // If no change, no need to rerun block + if (InInfo == BBInfo.Pred) + return; + + BBInfo.Pred = InInfo; + LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) + << " changed to " << BBInfo.Pred << "\n"); + + // Note: It's tempting to cache the state changes here, but due to the + // compatibility checks performed a blocks output state can change based on + // the input state. To cache, we'd have to add logic for finding + // never-compatible state changes. + VSETVLIInfo TmpStatus; + computeVLVTYPEChanges(MBB, TmpStatus); + + // If the new exit value matches the old exit value, we don't need to revisit + // any blocks. + if (BBInfo.Exit == TmpStatus) + return; + + BBInfo.Exit = TmpStatus; + LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) + << " changed to " << BBInfo.Exit << "\n"); + + // Add the successors to the work list so we can propagate the changed exit + // status. + for (MachineBasicBlock *S : MBB.successors()) + if (!BlockInfo[S->getNumber()].InQueue) { + BlockInfo[S->getNumber()].InQueue = true; + WorkList.push(S); + } +} +void RISCVVectorConfigInfo::compute(const MachineFunction &MF) { + assert(BlockInfo.empty() && "Expect empty block infos"); + BlockInfo.resize(MF.getNumBlockIDs()); + + HaveVectorOp = false; + + // Phase 1 - determine how VL/VTYPE are affected by the each block. + for (const MachineBasicBlock &MBB : MF) { + VSETVLIInfo TmpStatus; + HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); + // Initial exit state is whatever change we found in the block. + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + BBInfo.Exit = TmpStatus; + LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) + << " is " << BBInfo.Exit << "\n"); + } + + // If we didn't find any instructions that need VSETVLI, we're done. + if (!HaveVectorOp) { + BlockInfo.clear(); + return; + } + + // Phase 2 - determine the exit VL/VTYPE from each block. We add all + // blocks to the list here, but will also add any that need to be revisited + // during Phase 2 processing. + for (const MachineBasicBlock &MBB : MF) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeIncomingVLVTYPE(MBB); + } +} + +void RISCVVectorConfigInfo::clear() { BlockInfo.clear(); } + +char RISCVVectorConfigWrapperPass::ID = 0; + +INITIALIZE_PASS_BEGIN(RISCVVectorConfigWrapperPass, DEBUG_TYPE, + "RISC-V Vector Config Analysis", false, true) +INITIALIZE_PASS_END(RISCVVectorConfigWrapperPass, DEBUG_TYPE, + "RISC-V Vector Config Analysis", false, true) + +RISCVVectorConfigWrapperPass::RISCVVectorConfigWrapperPass() + : MachineFunctionPass(ID) {} + +void RISCVVectorConfigWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool RISCVVectorConfigWrapperPass::runOnMachineFunction(MachineFunction &MF) { + auto *LISWrapper = getAnalysisIfAvailable(); + LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; + Result = RISCVVectorConfigInfo(&MF.getSubtarget(), LIS); + Result.compute(MF); + return false; +} diff --git a/llvm/lib/Target/RISCV/RISCVVectorConfigAnalysis.h b/llvm/lib/Target/RISCV/RISCVVectorConfigAnalysis.h new file mode 100644 index 0000000000000..46825dfe7d069 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVVectorConfigAnalysis.h @@ -0,0 +1,384 @@ +//===- RISCVVectorConfigAnalysis ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the RISCV analysis of vector unit config. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVVCONFIGANALYSIS_H +#define LLVM_LIB_TARGET_RISCV_RISCVVCONFIGANALYSIS_H + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassManager.h" +#include +#include +using namespace llvm; + +namespace llvm { +/// Which subfields of VL or VTYPE have values we need to preserve? +struct DemandedFields { + // Some unknown property of VL is used. If demanded, must preserve entire + // value. + bool VLAny = false; + // Only zero vs non-zero is used. If demanded, can change non-zero values. + bool VLZeroness = false; + // What properties of SEW we need to preserve. + enum : uint8_t { + SEWEqual = 3, // The exact value of SEW needs to be preserved. + SEWGreaterThanOrEqualAndLessThan64 = + 2, // SEW can be changed as long as it's greater + // than or equal to the original value, but must be less + // than 64. + SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater + // than or equal to the original value. + SEWNone = 0 // We don't need to preserve SEW at all. + } SEW = SEWNone; + enum : uint8_t { + LMULEqual = 2, // The exact value of LMUL needs to be preserved. + LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1. + LMULNone = 0 // We don't need to preserve LMUL at all. + } LMUL = LMULNone; + bool SEWLMULRatio = false; + bool TailPolicy = false; + bool MaskPolicy = false; + // If this is true, we demand that VTYPE is set to some legal state, i.e. that + // vill is unset. + bool VILL = false; + + // Return true if any part of VTYPE was used + bool usedVTYPE() const { + return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; + } + + // Return true if any property of VL was used + bool usedVL() { return VLAny || VLZeroness; } + + // Mark all VTYPE subfields and properties as demanded + void demandVTYPE(); + + // Mark all VL properties as demanded + void demandVL() { + VLAny = true; + VLZeroness = true; + } + + static DemandedFields all() { + DemandedFields DF; + DF.demandVTYPE(); + DF.demandVL(); + return DF; + } + + // Make this the result of demanding both the fields in this and B. + void doUnion(const DemandedFields &B); + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Support for debugging, callable in GDB: V->dump() + LLVM_DUMP_METHOD void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + + void print(raw_ostream &OS) const; +#endif +}; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_ATTRIBUTE_USED +inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { + DF.print(OS); + return OS; +} +#endif + +/// Defines the abstract state with which the forward dataflow models the +/// values of the VL and VTYPE registers after insertion. +class VSETVLIInfo { + struct AVLDef { + // Every AVLDef should have a VNInfo, unless we're running without + // LiveIntervals in which case this will be nullptr. + const VNInfo *ValNo; + Register DefReg; + }; + union { + AVLDef AVLRegDef; + unsigned AVLImm; + }; + + enum : uint8_t { + Uninitialized, + AVLIsReg, + AVLIsImm, + AVLIsVLMAX, + Unknown, // AVL and VTYPE are fully unknown + } State = Uninitialized; + + // Fields from VTYPE. + RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1; + uint8_t SEW = 0; + uint8_t TailAgnostic : 1; + uint8_t MaskAgnostic : 1; + uint8_t SEWLMULRatioOnly : 1; + +public: + VSETVLIInfo() + : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), + SEWLMULRatioOnly(false) {} + + static VSETVLIInfo getUnknown() { + VSETVLIInfo Info; + Info.setUnknown(); + return Info; + } + + bool isValid() const { return State != Uninitialized; } + void setUnknown() { State = Unknown; } + bool isUnknown() const { return State == Unknown; } + + void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { + assert(AVLReg.isVirtual()); + AVLRegDef.ValNo = VNInfo; + AVLRegDef.DefReg = AVLReg; + State = AVLIsReg; + } + + void setAVLImm(unsigned Imm) { + AVLImm = Imm; + State = AVLIsImm; + } + + void setAVLVLMAX() { State = AVLIsVLMAX; } + + bool hasAVLImm() const { return State == AVLIsImm; } + bool hasAVLReg() const { return State == AVLIsReg; } + bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } + Register getAVLReg() const { + assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); + return AVLRegDef.DefReg; + } + unsigned getAVLImm() const { + assert(hasAVLImm()); + return AVLImm; + } + const VNInfo *getAVLVNInfo() const { + assert(hasAVLReg()); + return AVLRegDef.ValNo; + } + // Most AVLIsReg infos will have a single defining MachineInstr, unless it was + // a PHI node. In that case getAVLVNInfo()->def will point to the block + // boundary slot and this will return nullptr. If LiveIntervals isn't + // available, nullptr is also returned. + const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const; + + void setAVL(const VSETVLIInfo &Info); + + unsigned getSEW() const { return SEW; } + RISCVVType::VLMUL getVLMUL() const { return VLMul; } + bool getTailAgnostic() const { return TailAgnostic; } + bool getMaskAgnostic() const { return MaskAgnostic; } + + bool hasNonZeroAVL(const LiveIntervals *LIS) const; + + bool hasEquallyZeroAVL(const VSETVLIInfo &Other, + const LiveIntervals *LIS) const { + if (hasSameAVL(Other)) + return true; + return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS)); + } + + bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const; + // Return true if the two lattice values are guaranteed to have + // the same AVL value at runtime. + bool hasSameAVL(const VSETVLIInfo &Other) const; + + void setVTYPE(unsigned VType); + + void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA); + + void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; } + + unsigned encodeVTYPE() const { + assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && + "Can't encode VTYPE for uninitialized or unknown"); + return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + } + + bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } + + bool hasSameVTYPE(const VSETVLIInfo &Other) const; + + unsigned getSEWLMULRatio() const { + assert(isValid() && !isUnknown() && + "Can't use VTYPE for uninitialized or unknown"); + return RISCVVType::getSEWLMULRatio(SEW, VLMul); + } + + // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. + // Note that having the same VLMAX ensures that both share the same + // function from AVL to VL; that is, they must produce the same VL value + // for any given AVL value. + bool hasSameVLMAX(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + return getSEWLMULRatio() == Other.getSEWLMULRatio(); + } + + bool hasCompatibleVTYPE(const DemandedFields &Used, + const VSETVLIInfo &Require) const; + // Determine whether the vector instructions requirements represented by + // Require are compatible with the previous vsetvli instruction represented + // by this. MI is the instruction whose requirements we're considering. + bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, + const LiveIntervals *LIS) const; + + bool operator==(const VSETVLIInfo &Other) const; + + bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); } + + // Calculate the VSETVLIInfo visible to a block assuming this and Other are + // both predecessors. + VSETVLIInfo intersect(const VSETVLIInfo &Other) const; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Support for debugging, callable in GDB: V->dump() + LLVM_DUMP_METHOD void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + + void print(raw_ostream &OS) const; +#endif +}; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_ATTRIBUTE_USED +inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { + V.print(OS); + return OS; +} +#endif + +struct BlockData { + // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this + // block. Calculated in Phase 2. + VSETVLIInfo Exit; + + // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor + // blocks. Calculated in Phase 2, and used by Phase 3. + VSETVLIInfo Pred; + + // Keeps track of whether the block is already in the queue. + bool InQueue = false; + + BlockData() = default; +}; + +class RISCVVectorConfigInfo { + bool HaveVectorOp = false; + const RISCVSubtarget *ST; + // Possibly null! + LiveIntervals *LIS; + std::queue WorkList; + std::vector BlockInfo; + +public: + /// Return the fields and properties demanded by the provided instruction. + static DemandedFields getDemanded(const MachineInstr &MI, + const RISCVSubtarget *ST); + + /// Return true if moving from CurVType to NewVType is + /// indistinguishable from the perspective of an instruction (or set + /// of instructions) which use only the Used subfields and properties. + + static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, + const DemandedFields &Used); + // Return a VSETVLIInfo representing the changes made by this VSETVLI or + // VSETIVLI instruction. + VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; + + // Return true if we can mutate PrevMI to match MI without changing any the + // fields which would be observed. + bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, + const DemandedFields &Used) const; + RISCVVectorConfigInfo() {} + RISCVVectorConfigInfo(const RISCVSubtarget *ST, LiveIntervals *LIS) + : ST(ST), LIS(LIS) {} + const std::vector &getInfo() const { return BlockInfo; } + std::vector &getInfo() { return BlockInfo; } + bool haveVectorOp(); + void compute(const MachineFunction &MF); + void clear(); + // Given an incoming state reaching MI, minimally modifies that state so that + // it is compatible with MI. The resulting state is guaranteed to be + // semantically legal for MI, but may not be the state requested by MI. + void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; + // Given a state with which we evaluated MI (see transferBefore above for why + // this might be different that the state MI requested), modify the state to + // reflect the changes MI might make. + void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; + +private: + static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, + RISCVVType::VLMUL VLMul); + // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we + // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more + // places. + static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo, + const VSETVLIInfo &NewInfo, + DemandedFields &Demanded); + /// Return true if a VSETVLI is required to transition from CurInfo to Require + /// given a set of DemandedFields \p Used. + bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) const; + void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); + VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; + bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, + VSETVLIInfo &Info) const; + // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can + // replace the AVL operand with the AVL of the defining vsetvli. E.g. + // + // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 + // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1 + // -> + // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 + // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 + void forwardVSETVLIAVL(VSETVLIInfo &Info) const; +}; + +class RISCVVectorConfigAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = RISCVVectorConfigInfo; + Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); +}; + +class RISCVVectorConfigWrapperPass : public MachineFunctionPass { + RISCVVectorConfigInfo Result; + +public: + static char ID; + + RISCVVectorConfigWrapperPass(); + + void getAnalysisUsage(AnalysisUsage &) const override; + bool runOnMachineFunction(MachineFunction &) override; + void releaseMemory() override { Result.clear(); } + RISCVVectorConfigInfo &getResult() { return Result; } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_RISCV_RISCVVCONFIGANALYSIS_H diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 8714b286374a5..bb1160f306616 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -48,6 +48,7 @@ ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: RISC-V Vector Config Analysis ; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index c7f70a9d266c2..db477d365f21d 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -152,6 +152,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Greedy Register Allocator ; CHECK-NEXT: Virtual Register Rewriter +; CHECK-NEXT: RISC-V Vector Config Analysis ; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Dead register definitions ; CHECK-NEXT: Virtual Register Map