From 03d785ef3528dd6660b49d8f3fec3576b2b7c415 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 10 Jun 2025 14:06:53 -0700 Subject: [PATCH 1/5] [clang] "modular_format" attribute for functions using format strings This provides a C language version of the new IR modular-format attribute. This, in concert with the format attribute, allows a library function to declare that a modular version of its implementation is available. See issue #146159 for context. --- clang/include/clang/Basic/Attr.td | 11 +++++++++++ clang/include/clang/Basic/AttrDocs.td | 25 +++++++++++++++++++++++++ clang/lib/CodeGen/CGCall.cpp | 12 ++++++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 224cb6a32af28..79ffdf4663293 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5223,3 +5223,14 @@ def NonString : InheritableAttr { let Subjects = SubjectList<[Var, Field]>; let Documentation = [NonStringDocs]; } + +def ModularFormat : InheritableAttr { + let Spellings = [Clang<"modular_format">]; + let Args = [ + IdentifierArgument<"ModularImplFn">, + StringArgument<"ImplName">, + VariadicStringArgument<"Aspects"> + ]; + let Subjects = SubjectList<[Function]>; + let Documentation = [ModularFormatDocs]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 76747d2b11811..29de847efdd48 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9427,3 +9427,28 @@ silence diagnostics with code like: __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed }]; } + +def ModularFormatDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``modular_format`` attribute can be applied to a function that bears the +``format`` attribute to indicate that the implementation is modular on the +format string argument. When the format argument for a given call is constant, +the compiler may redirect the call to the symbol given as the first argument to +the attribute (the modular implementation function). + +The second argument is a implementation name, and the remaining arguments are +aspects of the format string for the compiler to report. If the compiler does +not understand a aspect, it must summarily report that the format string has +that aspect. + +The compiler reports an aspect by issing a relocation for the symbol +`_``. This arranges for code and data needed to support the +aspect of the implementation to be brought into the link to satisfy weak +references in the modular implemenation function. + +The following aspects are currently supported: + +- ``float``: The call has a floating point argument + }]; +} diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index d9bd443455e0f..89da6a77f3b51 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2560,6 +2560,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (TargetDecl->hasAttr()) FuncAttrs.addAttribute("aarch64_pstate_sm_body"); + + if (auto *ModularFormat = TargetDecl->getAttr()) { + // TODO: Error checking + FormatAttr *Format = TargetDecl->getAttr(); + std::string FormatIdx = std::to_string(Format->getFormatIdx()); + std::string FirstArg = std::to_string(Format->getFirstArg()); + SmallVector Args = { + FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(), + ModularFormat->getImplName()}; + llvm::append_range(Args, ModularFormat->aspects()); + FuncAttrs.addAttribute("modular-format", llvm::join(Args, ",")); + } } // Attach "no-builtins" attributes to: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 16b18bcb6a2a0..8ae9559f0075a 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6745,6 +6745,29 @@ static void handleVTablePointerAuthentication(Sema &S, Decl *D, CustomDiscriminationValue)); } +static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) { + StringRef ImplName; + if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName)) + return; + SmallVector Aspects; + for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) { + StringRef Aspect; + if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect)) + return; + Aspects.push_back(Aspect); + } + + // Store aspects sorted and without duplicates. + llvm::sort(Aspects); + Aspects.erase(llvm::unique(Aspects), Aspects.end()); + + // TODO: Type checking on identifier + // TODO: Merge attributes + D->addAttr(::new (S.Context) ModularFormatAttr( + S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName, + Aspects.data(), Aspects.size())); +} + //===----------------------------------------------------------------------===// // Top Level Sema Entry Points //===----------------------------------------------------------------------===// @@ -7669,6 +7692,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_VTablePointerAuthentication: handleVTablePointerAuthentication(S, D, AL); break; + + case ParsedAttr::AT_ModularFormat: + handleModularFormat(S, D, AL); + break; } } From 16de3c1c1aaaa6d14ed9a061045ea14cfa527d44 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 15 Jul 2025 11:28:20 -0700 Subject: [PATCH 2/5] Update docs to account for clang inferring format attribute --- clang/include/clang/Basic/AttrDocs.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 29de847efdd48..2ccbfe9aed2a2 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9432,10 +9432,11 @@ def ModularFormatDocs : Documentation { let Category = DocCatFunction; let Content = [{ The ``modular_format`` attribute can be applied to a function that bears the -``format`` attribute to indicate that the implementation is modular on the -format string argument. When the format argument for a given call is constant, -the compiler may redirect the call to the symbol given as the first argument to -the attribute (the modular implementation function). +``format`` attribute (or standard library functions) to indicate that the +implementation is modular on the format string argument. When the format string +for a given call is constant, the compiler may redirect the call to the symbol +given as the first argument to the attribute (the modular implementation +function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does From ffd5658917a6e9a4ff1c55e810c6a6091182f9ec Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 16 Jul 2025 15:19:37 -0700 Subject: [PATCH 3/5] Add an example to clang attr doc --- clang/include/clang/Basic/AttrDocs.td | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2ccbfe9aed2a2..2bd8190c0c2b6 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9444,10 +9444,18 @@ not understand a aspect, it must summarily report that the format string has that aspect. The compiler reports an aspect by issing a relocation for the symbol -`_``. This arranges for code and data needed to support the +``_``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. +For example, say ``printf`` is annotated with +``modular_format(__modular_printf, __printf, float)``. Then, a call to +``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would +become a call to ``__modular_printf`` with the same arguments, as would +``printf("%f", 42.0)``. The latter would be accompanied with a strong +relocation against the symbol ``__printf_float``, which would bring floating +point support for ``printf`` into the link. + The following aspects are currently supported: - ``float``: The call has a floating point argument From 6bf7051498bff7092c9687229c8013e8961dcc57 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 13:35:46 -0700 Subject: [PATCH 4/5] Emit the new type arg from format attr --- clang/lib/CodeGen/CGCall.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 89da6a77f3b51..d8fe35ce12737 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2564,10 +2564,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (auto *ModularFormat = TargetDecl->getAttr()) { // TODO: Error checking FormatAttr *Format = TargetDecl->getAttr(); + StringRef Type = Format->getType()->getName(); std::string FormatIdx = std::to_string(Format->getFormatIdx()); std::string FirstArg = std::to_string(Format->getFirstArg()); SmallVector Args = { - FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(), + Type, FormatIdx, FirstArg, + ModularFormat->getModularImplFn()->getName(), ModularFormat->getImplName()}; llvm::append_range(Args, ModularFormat->aspects()); FuncAttrs.addAttribute("modular-format", llvm::join(Args, ",")); From 0ad11ef6716ea20f471f811899ad5af55b0b9ac1 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 22 Jul 2025 15:01:56 -0700 Subject: [PATCH 5/5] Correct typos --- clang/include/clang/Basic/AttrDocs.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2bd8190c0c2b6..41adf44854205 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9440,10 +9440,10 @@ function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does -not understand a aspect, it must summarily report that the format string has +not understand an aspect, it must summarily report that the format string has that aspect. -The compiler reports an aspect by issing a relocation for the symbol +The compiler reports an aspect by issuing a relocation for the symbol ``_``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function.