From 8ff3f0b611027e08fc6fb2b0f5aeac9d1459ddbb Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 2 Apr 2025 16:24:57 -0700 Subject: [PATCH 1/6] [IR] "modular-format" attribute for functions using format strings A new InstCombine transform uses this attribute to rewrite calls to a modular version of the implementation along with llvm.reloc.none relocations against aspects of the implementation needed by the call. This change only adds support for the 'float' aspect, but it also builds the structure needed for others. See issue #146159 --- llvm/docs/LangRef.rst | 17 +++++ .../InstCombine/InstCombineCalls.cpp | 62 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index aba3a522bef31..b5e2cb8325ed7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2633,6 +2633,23 @@ For example: This attribute indicates that outlining passes should not modify the function. +``"modular_format"=",,,,"`` + This attribute indicates that the implementation is modular on a particular + format string argument . When the argument for a given call is constant, the + compiler may redirect the call to a modular implementation function + instead. + + The compiler also emits relocations to report various aspects of the format + string and arguments that were present. The compiler reports an aspect by + issing a relocation for the symbol `_``. This arranges + for code and data needed to support the aspect of the implementation to be + brought into the link to satisfy weak references in the modular + implemenation function. + + The following aspects are currently supported: + + - ``float``: The call has a floating point argument + Call Site Attributes ---------------------- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 1b78acea62bd6..6cf6ead5e2219 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" @@ -3951,6 +3952,63 @@ Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { return visitCallBase(CBI); } +static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { + if (!CI->hasFnAttr("modular-format")) + return nullptr; + + SmallVector Args( + llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); + // TODO: Examine the format argument in Args[0]. + // TODO: Error handling + unsigned FirstArgIdx; + if (!llvm::to_integer(Args[1], FirstArgIdx)) + return nullptr; + if (FirstArgIdx == 0) + return nullptr; + --FirstArgIdx; + StringRef FnName = Args[2]; + StringRef ImplName = Args[3]; + DenseSet Aspects(llvm::from_range, + ArrayRef(Args).drop_front(4)); + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + FunctionCallee ModularFn = + M->getOrInsertFunction(FnName, Callee->getFunctionType(), + Callee->getAttributes().removeFnAttribute( + M->getContext(), "modular-format")); + CallInst *New = cast(CI->clone()); + New->setCalledFunction(ModularFn); + New->removeFnAttr("modular-format"); + B.Insert(New); + + const auto ReferenceAspect = [&](StringRef Aspect) { + SmallString<20> Name = ImplName; + Name += '_'; + Name += Aspect; + Constant *Sym = + M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); + Function *RelocNoneFn = + Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); + B.CreateCall(RelocNoneFn, {Sym}); + }; + + if (Aspects.contains("float")) { + Aspects.erase("float"); + if (llvm::any_of( + llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx), + CI->arg_end()), + [](Value *V) { return V->getType()->isFloatingPointTy(); })) + ReferenceAspect("float"); + } + + SmallVector UnknownAspects(Aspects.begin(), Aspects.end()); + llvm::sort(UnknownAspects); + for (StringRef Request : UnknownAspects) + ReferenceAspect(Request); + + return New; +} + Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; @@ -3972,6 +4030,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { ++NumSimplified; return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); } + if (Value *With = optimizeModularFormat(CI, Builder)) { + ++NumSimplified; + return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); + } return nullptr; } From c02868f95cfdd3ce0529d06408d56f91ac0be1f9 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 8 Jul 2025 15:11:42 -0700 Subject: [PATCH 2/6] issing -> issuing --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b5e2cb8325ed7..f8f9d702fa584 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2641,7 +2641,7 @@ For example: The compiler also emits relocations to report various aspects of the format string and arguments that were present. The compiler reports an aspect by - issing a relocation for the symbol `_``. This arranges + issuing a relocation for the symbol `_``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. From 7d985647bbbc83fb064be1d6a404eff2d183c713 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Mon, 21 Jul 2025 15:09:58 -0700 Subject: [PATCH 3/6] Emit reloc.none instinsic with metdata string arg --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6cf6ead5e2219..7df5904e144f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3971,11 +3971,12 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { DenseSet Aspects(llvm::from_range, ArrayRef(Args).drop_front(4)); Module *M = CI->getModule(); + LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction(); FunctionCallee ModularFn = M->getOrInsertFunction(FnName, Callee->getFunctionType(), Callee->getAttributes().removeFnAttribute( - M->getContext(), "modular-format")); + Ctx, "modular-format")); CallInst *New = cast(CI->clone()); New->setCalledFunction(ModularFn); New->removeFnAttr("modular-format"); @@ -3985,11 +3986,10 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallString<20> Name = ImplName; Name += '_'; Name += Aspect; - Constant *Sym = - M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); Function *RelocNoneFn = Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); - B.CreateCall(RelocNoneFn, {Sym}); + B.CreateCall(RelocNoneFn, + {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))}); }; if (Aspects.contains("float")) { From 8bb14e82616b32c99284af691d58ef6986a1c0bc Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:24:20 -0700 Subject: [PATCH 4/6] Correct modular_format to modular-format in docs --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f8f9d702fa584..1daeb8a12e772 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2633,7 +2633,7 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular_format"=",,,,"`` +``"modular-format"=",,,,"`` This attribute indicates that the implementation is modular on a particular format string argument . When the argument for a given call is constant, the compiler may redirect the call to a modular implementation function From 23fca03c5961afda4193809687ca619b6d862bba Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:26:20 -0700 Subject: [PATCH 5/6] Describe the semantics of the arguments copied from C format attr --- llvm/docs/LangRef.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1daeb8a12e772..4ff803d9ab24b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2646,6 +2646,9 @@ For example: brought into the link to satisfy weak references in the modular implemenation function. + The first two arguments have the same semantics as the arguments to the C + ``format`` attribute. + The following aspects are currently supported: - ``float``: The call has a floating point argument From 4c26a6cd349417afeed89f248b4c08fd048e23e0 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:29:09 -0700 Subject: [PATCH 6/6] Add a type arg --- llvm/docs/LangRef.rst | 6 ++++-- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 4ff803d9ab24b..6f705a183c312 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2633,7 +2633,7 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular-format"=",,,,"`` +``"modular-format"=",,,,,"`` This attribute indicates that the implementation is modular on a particular format string argument . When the argument for a given call is constant, the compiler may redirect the call to a modular implementation function @@ -2646,13 +2646,15 @@ For example: brought into the link to satisfy weak references in the modular implemenation function. - The first two arguments have the same semantics as the arguments to the C + The first three arguments have the same semantics as the arguments to the C ``format`` attribute. The following aspects are currently supported: - ``float``: The call has a floating point argument + + Call Site Attributes ---------------------- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7df5904e144f7..c94089d7f6ed4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3958,18 +3958,18 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallVector Args( llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); - // TODO: Examine the format argument in Args[0]. + // TODO: Make use of the first two arguments // TODO: Error handling unsigned FirstArgIdx; - if (!llvm::to_integer(Args[1], FirstArgIdx)) + if (!llvm::to_integer(Args[2], FirstArgIdx)) return nullptr; if (FirstArgIdx == 0) return nullptr; --FirstArgIdx; - StringRef FnName = Args[2]; - StringRef ImplName = Args[3]; + StringRef FnName = Args[3]; + StringRef ImplName = Args[4]; DenseSet Aspects(llvm::from_range, - ArrayRef(Args).drop_front(4)); + ArrayRef(Args).drop_front(5)); Module *M = CI->getModule(); LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction();