From cebc746dac3e33dca1a91bb4d801268faf6c33cf Mon Sep 17 00:00:00 2001 From: Konstantin Schwarz Date: Wed, 24 Sep 2025 16:54:47 -0600 Subject: [PATCH] [AIE2P] Add alternative calling convention for library calls In AIEs default calling convention, all vector registers are caller saved. However, most compiler-rt builtin library functions do not use vector registers. To avoid having to spill vectors to memory in the caller function, we can attach a different calling convention to these library functions. --- llvm/include/llvm/IR/CallingConv.h | 3 +++ llvm/lib/Target/AIE/AIEBaseISelLowering.cpp | 1 + llvm/lib/Target/AIE/aie2p/AIE2PCallingConv.td | 7 ++++++ .../Target/AIE/aie2p/AIE2PISelLowering.cpp | 25 +++++++++++++++++++ .../Target/AIE/aie2p/AIE2PRegisterInfo.cpp | 10 ++++++-- .../CodeGen/AIE/GlobalISel/legalize-sdiv.mir | 2 +- .../AIE/GlobalISel/legalize-sdivrem.mir | 2 +- .../AIE/GlobalISel/legalize-sitofp.mir | 2 +- .../CodeGen/AIE/GlobalISel/legalize-srem.mir | 2 +- .../CodeGen/AIE/GlobalISel/legalize-udiv.mir | 2 +- .../AIE/GlobalISel/legalize-udivrem.mir | 2 +- .../CodeGen/AIE/GlobalISel/legalize-urem.mir | 2 +- llvm/test/CodeGen/AIE/aie2p/vscl2vec.ll | 20 +++++++-------- 13 files changed, 61 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index 55e32028e3ed..3eea909f33a8 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -270,6 +270,9 @@ namespace CallingConv { /// Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15. AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 = 111, + /// Preserve vector registers. + AIE_PreserveAll_Vec = 112, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/Target/AIE/AIEBaseISelLowering.cpp b/llvm/lib/Target/AIE/AIEBaseISelLowering.cpp index 21ab231e875d..63c11186b07d 100644 --- a/llvm/lib/Target/AIE/AIEBaseISelLowering.cpp +++ b/llvm/lib/Target/AIE/AIEBaseISelLowering.cpp @@ -18,6 +18,7 @@ #include "MCTargetDesc/AIE2MCTargetDesc.h" #include "MCTargetDesc/AIEMCTargetDesc.h" #include "MCTargetDesc/aie2p/AIE2PMCTargetDesc.h" +#include "llvm/IR/RuntimeLibcalls.h" #include "llvm/MC/MCRegister.h" using namespace llvm; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PCallingConv.td b/llvm/lib/Target/AIE/aie2p/AIE2PCallingConv.td index 5905a629ce12..b7b0d86b42cf 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PCallingConv.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PCallingConv.td @@ -219,5 +219,12 @@ def CC_AIE2P : CallingConv<[ def CSR_AIE2P : CalleeSavedRegs<(add lr, r8, r9, r10, r11, r12, r13, r14, r15, p6, p7)>; +def CSR_AIE2P_Vec + : CalleeSavedRegs<(add lr, r8, r9, r10, r11, r12, r13, r14, r15, p6, p7, + wl0, wl2, wl4, wl6, wl8, wl10, wl1, wl3, wl5, wl7, wl9, wl11, wh0, + wh2, wh4, wh6, wh8, wh10, wh1, wh3, wh5, wh7, wh9, wh11, bmll0, bmll1, + bmll2, bmll3, bmll4, bmhl0, bmhl1, bmhl2, bmhl3, bmhl4, bmlh0, bmlh1, + bmlh2, bmlh3, bmlh4, bmhh0, bmhh1, bmhh2, bmhh3, bmhh4)>; + // Needed for implementation of AIERegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PISelLowering.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PISelLowering.cpp index b6edef762185..fe602056e71c 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PISelLowering.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PISelLowering.cpp @@ -22,9 +22,34 @@ using namespace llvm; #define DEBUG_TYPE "aie-lower" +cl::opt + VecCCLibcalls("aie-libcalls-preserve-vectors", cl::init(true), cl::Hidden, + cl::desc("Assume all vector registers are callee-saved by " + "builtin library functions.")); + AIE2PTargetLowering::AIE2PTargetLowering(const TargetMachine &TM, const AIEBaseSubtarget &STI) : AIEBaseTargetLowering(TM, STI) { + + if (VecCCLibcalls) { + setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, + CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, + CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, + CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, + CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SREM_I32, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::UREM_I32, CallingConv::AIE_PreserveAll_Vec); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::AIE_PreserveAll_Vec); + } + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // We already define in .td which types are legal for each register class. diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index dce18a4e6ff6..20dd6f715dcd 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Support/ErrorHandling.h" #define GET_REGINFO_TARGET_DESC @@ -279,8 +280,13 @@ AIE2PRegisterInfo::getPointerRegClass(const MachineFunction &MF, const uint32_t * AIE2PRegisterInfo::getCallPreservedMask(const MachineFunction &MF, - CallingConv::ID /*CC*/) const { - return CSR_AIE2P_RegMask; + CallingConv::ID CC) const { + switch (CC) { + case CallingConv::AIE_PreserveAll_Vec: + return CSR_AIE2P_Vec_RegMask; + default: + return CSR_AIE2P_RegMask; + } } bool AIE2PRegisterInfo::isTypeLegalForClass(const TargetRegisterClass &RC, diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdiv.mir index bef369ffe1e7..d3e05dbb459b 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdiv.mir @@ -6,7 +6,7 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=1 %s # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec %s --- name: sdiv_s32 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdivrem.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdivrem.mir index d15a04881f2f..5e2298f84e7b 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdivrem.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-sdivrem.mir @@ -6,7 +6,7 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=1 %s # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec %s --- name: sdivrem_s32 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-sitofp.mir index d40a73610a28..af3253b0ad8b 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-sitofp.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-sitofp.mir @@ -4,7 +4,7 @@ # # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 --check-prefix=COMMON --check-prefix=AIE2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p --check-prefix=COMMON --check-prefix=AIE2P %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec --check-prefix=COMMON --check-prefix=AIE2P %s --- name: test_sitofp_s32_to_s32 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-srem.mir index 688c4828eadf..8d3ef7c06b32 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-srem.mir @@ -6,7 +6,7 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=1 %s # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec %s --- name: srem_s32 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-udiv.mir index 4e8aa57ec2be..e46a47a0f000 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-udiv.mir @@ -6,7 +6,7 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=1 %s # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec %s --- name: udiv_s32 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-udivrem.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-udivrem.mir index dc8b0f77cabd..f305fa1ce04c 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-udivrem.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-udivrem.mir @@ -6,7 +6,7 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=1 %s # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec %s --- name: udivrem_s32 diff --git a/llvm/test/CodeGen/AIE/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AIE/GlobalISel/legalize-urem.mir index 09c599df62d6..015a88780ec5 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/legalize-urem.mir @@ -6,7 +6,7 @@ # (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=1 %s # RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2 %s -# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p %s +# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck -DVER=2p_vec %s --- name: urem_s32 diff --git a/llvm/test/CodeGen/AIE/aie2p/vscl2vec.ll b/llvm/test/CodeGen/AIE/aie2p/vscl2vec.ll index 14eb00b68c9a..127a1049c14f 100644 --- a/llvm/test/CodeGen/AIE/aie2p/vscl2vec.ll +++ b/llvm/test/CodeGen/AIE/aie2p/vscl2vec.ll @@ -520,24 +520,24 @@ entry: define dso_local noundef <16 x float> @_Z13test_upd_elemDv16_fif(<16 x float> noundef %v, i32 noundef %idx, float noundef %b) local_unnamed_addr { ; CHECK-LABEL: _Z13test_upd_elemDv16_fif: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: nopa ; jl #__floatsisf -; CHECK-NEXT: paddxm [sp], #128 // Delay Slot 5 -; CHECK-NEXT: st r8, [sp, #-128] // 4-byte Folded Spill Delay Slot 4 -; CHECK-NEXT: st lr, [sp, #-124] // 4-byte Folded Spill Delay Slot 3 -; CHECK-NEXT: vst x2, [sp, #-64] // 64-byte Folded Spill Delay Slot 2 +; CHECK-NEXT: nopa ; nopb ; jl #__floatsisf +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: paddxm [sp], #64 // Delay Slot 4 +; CHECK-NEXT: st lr, [sp, #-60] // 4-byte Folded Spill Delay Slot 3 +; CHECK-NEXT: st r8, [sp, #-64] // 4-byte Folded Spill Delay Slot 2 ; CHECK-NEXT: mov r8, r0 // Delay Slot 1 -; CHECK-NEXT: lda lr, [sp, #-124]; nopxm // 4-byte Folded Reload +; CHECK-NEXT: lda lr, [sp, #-60]; nopb ; nopxm // 4-byte Folded Reload +; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: vlda x0, [sp, #-64] // 64-byte Folded Reload ; CHECK-NEXT: nop -; CHECK-NEXT: lda r8, [sp, #-128] // 4-byte Folded Reload +; CHECK-NEXT: lda r8, [sp, #-64] // 4-byte Folded Reload ; CHECK-NEXT: ret lr ; CHECK-NEXT: nop // Delay Slot 5 ; CHECK-NEXT: mov r29, r8 // Delay Slot 4 -; CHECK-NEXT: paddxm [sp], #-128 // Delay Slot 3 -; CHECK-NEXT: vinsert.32 x0, x0, r29, r0 // Delay Slot 2 +; CHECK-NEXT: paddxm [sp], #-64 // Delay Slot 3 +; CHECK-NEXT: vinsert.32 x0, x2, r29, r0 // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: %0 = bitcast float %b to i32