Skip to content

Commit e6d9542

Browse files
authored
[X86][Inline] Check correct function for target feature check (#152515)
The check for ABI differences for inlined calls involves the caller, the callee and the nested callee. Before inlining, the ABI is determined by the target features of the callee. After inlining it is determined by the caller. The features of the nested callee should never actually matter.
1 parent 86ac834 commit e6d9542

File tree

2 files changed

+36
-20
lines changed

2 files changed

+36
-20
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6526,8 +6526,8 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
65266526

65276527
for (const Instruction &I : instructions(Callee)) {
65286528
if (const auto *CB = dyn_cast<CallBase>(&I)) {
6529-
// Having more target features is fine for inline ASM.
6530-
if (CB->isInlineAsm())
6529+
// Having more target features is fine for inline ASM and intrinsics.
6530+
if (CB->isInlineAsm() || CB->getIntrinsicID() != Intrinsic::not_intrinsic)
65316531
continue;
65326532

65336533
SmallVector<Type *, 8> Types;
@@ -6543,19 +6543,9 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
65436543
if (all_of(Types, IsSimpleTy))
65446544
continue;
65456545

6546-
if (Function *NestedCallee = CB->getCalledFunction()) {
6547-
// Assume that intrinsics are always ABI compatible.
6548-
if (NestedCallee->isIntrinsic())
6549-
continue;
6550-
6551-
// Do a precise compatibility check.
6552-
if (!areTypesABICompatible(Caller, NestedCallee, Types))
6553-
return false;
6554-
} else {
6555-
// We don't know the target features of the callee,
6556-
// assume it is incompatible.
6546+
// Do a precise compatibility check.
6547+
if (!areTypesABICompatible(Caller, Callee, Types))
65576548
return false;
6558-
}
65596549
}
65606550
}
65616551
return true;

llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ define i64 @callee_not_avx(<4 x i64> %arg) noinline {
3434
ret i64 %v
3535
}
3636

37-
; This call also shouldn't be inlined, as we don't know whether callee_unknown
38-
; is ABI compatible or not.
37+
; This call also shouldn't be inlined, as caller_not_avx2 is not ABI compatible.
3938
define void @caller_avx2() "target-features"="+avx" {
4039
; CHECK-LABEL: define {{[^@]+}}@caller_avx2
4140
; CHECK-SAME: () #[[ATTR0]] {
@@ -55,27 +54,54 @@ define internal void @caller_not_avx2() {
5554
ret void
5655
}
5756

57+
; Should be inlined, as caller_avx7 is ABI compatible. The fact that we don't
58+
; know anything about callee_unknown doesn't matter, as it is the caller that
59+
; determines the ABI as far as target features are concerned.
60+
define void @caller_avx6() "target-features"="+avx" {
61+
; CHECK-LABEL: define {{[^@]+}}@caller_avx6
62+
; CHECK-SAME: () #[[ATTR0]] {
63+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
64+
; CHECK-NEXT: ret void
65+
;
66+
call void @caller_avx7()
67+
ret void
68+
}
69+
70+
define void @caller_avx7() "target-features"="+avx" {
71+
; CHECK-LABEL: define {{[^@]+}}@caller_avx7
72+
; CHECK-SAME: () #[[ATTR0]] {
73+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
74+
; CHECK-NEXT: ret void
75+
;
76+
call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
77+
ret void
78+
}
79+
5880
declare i64 @callee_unknown(<4 x i64>)
5981

6082
; This call should get inlined, because we assume that intrinsics are always
6183
; ABI compatible.
6284
define void @caller_avx3() "target-features"="+avx" {
6385
; CHECK-LABEL: define {{[^@]+}}@caller_avx3
6486
; CHECK-SAME: () #[[ATTR0]] {
65-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.some_intrinsic(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
87+
; CHECK-NEXT: [[V_I:%.*]] = load <4 x i64>, ptr @g, align 32
88+
; CHECK-NEXT: [[V2_I:%.*]] = call <4 x i64> @llvm.abs.v4i64(<4 x i64> [[V_I]], i1 false)
89+
; CHECK-NEXT: store <4 x i64> [[V2_I]], ptr @g, align 32
6690
; CHECK-NEXT: ret void
6791
;
6892
call void @caller_not_avx3()
6993
ret void
7094
}
7195

96+
@g = external global <4 x i64>
97+
7298
define internal void @caller_not_avx3() {
73-
call i64 @llvm.some_intrinsic(<4 x i64> <i64 0, i64 1, i64 2, i64 3>)
99+
%v = load <4 x i64>, ptr @g
100+
%v2 = call <4 x i64> @llvm.abs(<4 x i64> %v, i1 false)
101+
store <4 x i64> %v2, ptr @g
74102
ret void
75103
}
76104

77-
declare i64 @llvm.some_intrinsic(<4 x i64>)
78-
79105
; This call should get inlined, because only simple types are involved.
80106
define void @caller_avx4() "target-features"="+avx" {
81107
; CHECK-LABEL: define {{[^@]+}}@caller_avx4

0 commit comments

Comments
 (0)