Skip to content

Commit 921c6db

Browse files
authored
[llvm] Introduce callee_type metadata
Introduce `callee_type` metadata which will be attached to the indirect call instructions. The `callee_type` metadata will be used to generate `.callgraph` section described in this RFC: https://lists.llvm.org/pipermail/llvm-dev/2021-July/151739.html Reviewers: morehouse, petrhosek, nikic, ilovepi Reviewed By: nikic, ilovepi Pull Request: #87573
1 parent 6d8e53d commit 921c6db

File tree

15 files changed

+405
-0
lines changed

15 files changed

+405
-0
lines changed

llvm/docs/CalleeTypeMetadata.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
====================
2+
Callee Type Metadata
3+
====================
4+
5+
Introduction
6+
============
7+
This ``!callee_type`` metadata is introduced to support the generation of a call graph
8+
section in the object file. The ``!callee_type`` metadata is used
9+
to identify the types of the intended callees of indirect call instructions. The ``!callee_type`` metadata is a
10+
list of one or more generalized ``!type`` metadata objects (See :doc:`TypeMetadata`) with each ``!type``
11+
metadata pointing to a callee's :ref:`type identifier <calleetype-type-identifier>`.
12+
LLVM's `Control Flow Integrity (CFI)`_ also uses the ``!type`` metadata in its implementation.
13+
14+
.. _Control Flow Integrity (CFI): https://clang.llvm.org/docs/ControlFlowIntegrity.html
15+
16+
.. _calleetype-type-identifier:
17+
18+
Type identifier
19+
================
20+
21+
The type for an indirect call target is the callee's function signature.
22+
Mapping from a type to an identifier is an ABI detail.
23+
In the current implementation, an identifier of type T is
24+
computed as follows:
25+
26+
- Obtain the generalized mangled name for “typeinfo name for T”.
27+
- Compute MD5 hash of the name as a string.
28+
- Reinterpret the first 8 bytes of the hash as a little-endian 64-bit integer.
29+
30+
To avoid mismatched pointer types, generalizations are applied.
31+
Pointers in return and argument types are treated as equivalent as long as the qualifiers for the
32+
type they point to match. For example, ``char*``, ``char**``, and ``int*`` are considered equivalent
33+
types. However, ``char*`` and ``const char*`` are considered distinct types.

llvm/docs/LangRef.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8171,6 +8171,11 @@ change in the future.
81718171

81728172
See :doc:`TypeMetadata`.
81738173

8174+
'``callee_type``' Metadata
8175+
^^^^^^^^^^^^^^^^^^^^^^^^^^
8176+
8177+
See :doc:`CalleeTypeMetadata`.
8178+
81748179
'``associated``' Metadata
81758180
^^^^^^^^^^^^^^^^^^^^^^^^^
81768181

llvm/docs/Reference.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ LLVM and API reference documentation.
1414
BlockFrequencyTerminology
1515
BranchWeightMetadata
1616
Bugpoint
17+
CalleeTypeMetadata
1718
CIBestPractices
1819
CommandGuide/index
1920
ConvergenceAndUniformity

llvm/include/llvm/IR/FixedMetadataKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@ LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
5353
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
5454
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
5555
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
56+
LLVM_FIXED_MD_KIND(MD_callee_type, "callee_type", 42)

llvm/include/llvm/IR/Metadata.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,13 @@ class MDNode : public Metadata {
12551255
bool isReplaceable() const { return isTemporary() || isAlwaysReplaceable(); }
12561256
bool isAlwaysReplaceable() const { return getMetadataID() == DIAssignIDKind; }
12571257

1258+
/// Check if this is a valid generalized type metadata node.
1259+
bool hasGeneralizedMDString() {
1260+
if (getNumOperands() < 2 || !isa<MDString>(getOperand(1)))
1261+
return false;
1262+
return cast<MDString>(getOperand(1))->getString().ends_with(".generalized");
1263+
}
1264+
12581265
unsigned getNumTemporaryUses() const {
12591266
assert(isTemporary() && "Only for temporaries");
12601267
return Context.getReplaceableUses()->getNumUses();
@@ -1467,6 +1474,8 @@ class MDNode : public Metadata {
14671474
const Instruction *BInstr);
14681475
LLVM_ABI static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
14691476
LLVM_ABI static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
1477+
LLVM_ABI static MDNode *getMergedCalleeTypeMetadata(const MDNode *A,
1478+
const MDNode *B);
14701479
};
14711480

14721481
/// Tuple of metadata.

llvm/lib/IR/Metadata.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,24 @@ static void addRange(SmallVectorImpl<ConstantInt *> &EndPoints,
13031303
EndPoints.push_back(High);
13041304
}
13051305

1306+
MDNode *MDNode::getMergedCalleeTypeMetadata(const MDNode *A, const MDNode *B) {
1307+
// Drop the callee_type metadata if either of the call instructions do not
1308+
// have it.
1309+
if (!A || !B)
1310+
return nullptr;
1311+
SmallVector<Metadata *, 8> AB;
1312+
SmallPtrSet<Metadata *, 8> MergedCallees;
1313+
auto AddUniqueCallees = [&AB, &MergedCallees](const MDNode *N) {
1314+
for (Metadata *MD : N->operands()) {
1315+
if (MergedCallees.insert(MD).second)
1316+
AB.push_back(MD);
1317+
}
1318+
};
1319+
AddUniqueCallees(A);
1320+
AddUniqueCallees(B);
1321+
return MDNode::get(A->getContext(), AB);
1322+
}
1323+
13061324
MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
13071325
// Given two ranges, we want to compute the union of the ranges. This
13081326
// is slightly complicated by having to combine the intervals and merge

llvm/lib/IR/Verifier.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
531531
void visitCallStackMetadata(MDNode *MD);
532532
void visitMemProfMetadata(Instruction &I, MDNode *MD);
533533
void visitCallsiteMetadata(Instruction &I, MDNode *MD);
534+
void visitCalleeTypeMetadata(Instruction &I, MDNode *MD);
534535
void visitDIAssignIDMetadata(Instruction &I, MDNode *MD);
535536
void visitMMRAMetadata(Instruction &I, MDNode *MD);
536537
void visitAnnotationMetadata(MDNode *Annotation);
@@ -5193,6 +5194,33 @@ void Verifier::visitCallsiteMetadata(Instruction &I, MDNode *MD) {
51935194
visitCallStackMetadata(MD);
51945195
}
51955196

5197+
static inline bool isConstantIntMetadataOperand(const Metadata *MD) {
5198+
if (auto *VAL = dyn_cast<ValueAsMetadata>(MD))
5199+
return isa<ConstantInt>(VAL->getValue());
5200+
return false;
5201+
}
5202+
5203+
void Verifier::visitCalleeTypeMetadata(Instruction &I, MDNode *MD) {
5204+
Check(isa<CallBase>(I), "!callee_type metadata should only exist on calls",
5205+
&I);
5206+
for (Metadata *Op : MD->operands()) {
5207+
Check(isa<MDNode>(Op),
5208+
"The callee_type metadata must be a list of type metadata nodes", Op);
5209+
auto *TypeMD = cast<MDNode>(Op);
5210+
Check(TypeMD->getNumOperands() == 2,
5211+
"Well-formed generalized type metadata must contain exactly two "
5212+
"operands",
5213+
Op);
5214+
Check(isConstantIntMetadataOperand(TypeMD->getOperand(0)) &&
5215+
mdconst::extract<ConstantInt>(TypeMD->getOperand(0))->isZero(),
5216+
"The first operand of type metadata for functions must be zero", Op);
5217+
Check(TypeMD->hasGeneralizedMDString(),
5218+
"Only generalized type metadata can be part of the callee_type "
5219+
"metadata list",
5220+
Op);
5221+
}
5222+
}
5223+
51965224
void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
51975225
Check(isa<MDTuple>(Annotation), "annotation must be a tuple");
51985226
Check(Annotation->getNumOperands() >= 1,
@@ -5470,6 +5498,9 @@ void Verifier::visitInstruction(Instruction &I) {
54705498
if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite))
54715499
visitCallsiteMetadata(I, MD);
54725500

5501+
if (MDNode *MD = I.getMetadata(LLVMContext::MD_callee_type))
5502+
visitCalleeTypeMetadata(I, MD);
5503+
54735504
if (MDNode *MD = I.getMetadata(LLVMContext::MD_DIAssignID))
54745505
visitDIAssignIDMetadata(I, MD);
54755506

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4352,6 +4352,13 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
43524352
Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
43534353
}
43544354

4355+
// Drop unnecessary callee_type metadata from calls that were converted
4356+
// into direct calls.
4357+
if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4358+
Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4359+
Changed = true;
4360+
}
4361+
43554362
// Drop unnecessary kcfi operand bundles from calls that were converted
43564363
// into direct calls.
43574364
auto Bundle = Call.getOperandBundle(LLVMContext::OB_kcfi);

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3005,6 +3005,12 @@ static void combineMetadata(Instruction *K, const Instruction *J,
30053005
case LLVMContext::MD_memprof:
30063006
case LLVMContext::MD_callsite:
30073007
break;
3008+
case LLVMContext::MD_callee_type:
3009+
if (!AAOnly) {
3010+
K->setMetadata(LLVMContext::MD_callee_type,
3011+
MDNode::getMergedCalleeTypeMetadata(KMD, JMD));
3012+
}
3013+
break;
30083014
case LLVMContext::MD_align:
30093015
if (!AAOnly && (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef)))
30103016
K->setMetadata(

llvm/lib/Transforms/Utils/ValueMapper.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,13 @@ void Mapper::remapInstruction(Instruction *I) {
987987
"Referenced value not in value map!");
988988
}
989989

990+
// Drop callee_type metadata from calls that were remapped
991+
// into a direct call from an indirect one.
992+
if (auto *CB = dyn_cast<CallBase>(I)) {
993+
if (CB->getMetadata(LLVMContext::MD_callee_type) && !CB->isIndirectCall())
994+
CB->setMetadata(LLVMContext::MD_callee_type, nullptr);
995+
}
996+
990997
// Remap phi nodes' incoming blocks.
991998
if (PHINode *PN = dyn_cast<PHINode>(I)) {
992999
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {

0 commit comments

Comments
 (0)