From b49e586335d97f6ae15904af8c4c8be4e66a2b77 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Thu, 1 May 2025 17:17:33 -0700 Subject: [PATCH] [AIE2p] Use multi-slot pseudo for const COPY with unique def --- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 31 +++++++++++++------ .../aie2p/end-to-end/conv2d_bfp16_convert.ll | 8 ++--- .../AIE/aie2p/end-to-end/gelu-templated.ll | 6 ++-- .../AIE/aie2p/postrapseudos/pseudomove.mir | 12 +++++++ 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 49d892737bf3..bca83d7be800 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -557,10 +557,19 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AIE2P::mMvSclSrcRegClass.contains(SrcReg) && AIE2P::mMvSclDstRegClass.contains(DstReg)) { - // Build MultiSlotPseudo in preference - const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg); - BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + if (MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg); + MI && MI->isMoveImmediate()) { + // Try modifying scalar move to pseudo immediate move. + const int64_t Imm = MI->getOperand(1).getImm(); + APInt ImmVal = APInt(64, Imm); + auto OpCode = getConstantMovOpcode(MRI, DstReg, ImmVal); + BuildMI(MBB, MBBI, DL, get(OpCode), DstReg).addImm(Imm); + } else { + // Build MultiSlotPseudo in preference + const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg); + BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } } else if ((AIE2P::eLRegClass.contains(SrcReg)) && (AIE2P::eLRegClass.contains(DstReg))) { BuildMI(MBB, MBBI, DL, get(AIE2P::MOV_alu_mv_mv_mv_scl), @@ -1138,11 +1147,15 @@ unsigned AIE2PInstrInfo::getConstantMovOpcode(MachineRegisterInfo &MRI, unsigned int ImmSize = Val.getSignificantBits(); const TargetRegisterClass *DstRegClass = nullptr; - const RegClassOrRegBank &RCB = MRI.getRegClassOrRegBank(Reg); - if (const RegisterBank *RB = RCB.dyn_cast()) - DstRegClass = &TRI->getMinClassForRegBank(*RB, MRI.getType(Reg)); - if (auto *TRC = RCB.dyn_cast()) - DstRegClass = TRC; + if (Register::isVirtualRegister(Reg)) { + const RegClassOrRegBank &RCB = MRI.getRegClassOrRegBank(Reg); + if (const RegisterBank *RB = RCB.dyn_cast()) + DstRegClass = &TRI->getMinClassForRegBank(*RB, MRI.getType(Reg)); + if (auto *TRC = RCB.dyn_cast()) + DstRegClass = TRC; + } else { + DstRegClass = TRI->getMinimalPhysRegClass(Reg); + } assert(DstRegClass != nullptr && "RC cannot be null"); if (ImmSize <= 11) { if (regClassMatches(AIE2P::mAluCgRegClass, DstRegClass, Reg)) diff --git a/llvm/test/CodeGen/AIE/aie2p/end-to-end/conv2d_bfp16_convert.ll b/llvm/test/CodeGen/AIE/aie2p/end-to-end/conv2d_bfp16_convert.ll index c749e9ab9ceb..9bc72bd57f71 100644 --- a/llvm/test/CodeGen/AIE/aie2p/end-to-end/conv2d_bfp16_convert.ll +++ b/llvm/test/CodeGen/AIE/aie2p/end-to-end/conv2d_bfp16_convert.ll @@ -23,9 +23,9 @@ define weak_odr dso_local void @convert_bf16_to_bfp16(ptr noalias %in, ptr noali ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: movx r24, #0 -; CHECK-NEXT: mova dj0, #0; mov r26, r24 -; CHECK-NEXT: vldb.fill.512 [p0, lf0, r24]; mov dj1, dj0 -; CHECK-NEXT: movs dc1, dj0; vldb.pop.512 x0, [p0, lf0, r24]; mov dn1, dn0 +; CHECK-NEXT: mov r26, r24 +; CHECK-NEXT: mova dj0, #0; vldb.fill.512 [p0, lf0, r24] +; CHECK-NEXT: mova dc1, #0; vldb.pop.512 x0, [p0, lf0, r24]; movs dj1, dj0; mov dn1, dn0 ; CHECK-NEXT: vldb.pop.512.2d x2, [p0, lf0, r24, d1] ; CHECK-NEXT: nop ; CHECK-NEXT: vldb.fill.512 [p0, lf0, r24] @@ -35,7 +35,7 @@ define weak_odr dso_local void @convert_bf16_to_bfp16(ptr noalias %in, ptr noali ; CHECK-NEXT: nopa ; vldb.fill.512 [p0, lf0, r24]; nops ; nopxm ; nopv ; CHECK-NEXT: nopa ; vldb.pop.512 x0, [p0, lf0, r24]; nops ; nopx ; vconv.fp32.bf16 cml0, x0; nopv ; CHECK-NEXT: nopa ; vldb.pop.512.2d x2, [p0, lf0, r24, d1]; nops ; nopx ; vconv.fp32.bf16 cmh0, x2; nopv -; CHECK-NEXT: nopa ; nopb ; movs dc0, dj0; nopx ; mov p2, p1; nopv +; CHECK-NEXT: mova dc0, #0; nopb ; nops ; nopx ; mov p2, p1; nopv ; CHECK-NEXT: // implicit-def: $sf ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: // %for.body diff --git a/llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll b/llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll index 22e201c59a13..481548d61a58 100644 --- a/llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll +++ b/llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll @@ -52,10 +52,10 @@ define void @gelu_fn(ptr noalias %ifm, ptr noalias %ofm, ptr nonnull align 64 de ; CHECK-NEXT: nop ; CHECK-NEXT: vconv.bf16.fp32 x5, cml1 ; CHECK-NEXT: vconv.bf16.fp32 x8, cml4; movxm ls, #.LBB0_1; vmul.f dm4, x10, x4, r2 -; CHECK-NEXT: mova r3, #0; nopb ; vconv.bf16.fp32 x7, cml2; movxm le, #.L_LEnd0; vmul.f dm4, x5, x4, r2 -; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; mov s0, r3; vadd.f dm2, dm1, dm2, r0 +; CHECK-NEXT: vconv.bf16.fp32 x7, cml2; movxm le, #.L_LEnd0; vmul.f dm4, x5, x4, r2 +; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; mov s0, #0; vadd.f dm2, dm1, dm2, r0 ; CHECK-NEXT: vmov cml2, cml0; vmul.f dm3, x7, x2, r2 -; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vfloor.s32.bf16 x1, wl8, s0; movx r4, #-5; vmul.f dm4, x5, x4, r2 +; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; vfloor.s32.bf16 x1, wl8, s0; movx r4, #-5; mov r3, #0; vmul.f dm4, x5, x4, r2 ; CHECK-NEXT: vfloor.s32.bf16 x3, wh8, s0; lshl r4, r1, r4; vbcst.16 x6, r3 ; CHECK-NEXT: mova r1, #2; vconv.bf16.fp32 x10, cml4; add.nc lc, r4, #-7 ; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vshuffle x1, x1, x3, r1; nopv diff --git a/llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir b/llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir index 33f918daf3ae..fcfab3e9b386 100644 --- a/llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir +++ b/llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir @@ -47,6 +47,18 @@ body: | $r1 = COPY $r0 ... +--- +name: COPY_GPR_const +alignment: 16 +body: | + bb.0 (align 16): + ; CHECK-LABEL: name: COPY_GPR_const + ; CHECK: $r0 = MOV_RLC_imm11_pseudo 10 + ; CHECK-NEXT: $r1 = MOV_RLC_imm11_pseudo 10 + $r0 = MOV_RLC_imm11_pseudo 10 + $r1 = COPY $r0 +... + --- name: COPY_non_GPR alignment: 16