Skip to content

Commit f03ffb1

Browse files
authored
Merge branch 'main' into reland-enable-masked-interleave
2 parents d719a96 + 1194353 commit f03ffb1

File tree

45 files changed

+1012
-485
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1012
-485
lines changed

clang/docs/InternalsManual.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ wording a diagnostic.
139139
you mean %1?``.
140140

141141
* Appropriately capitalize proper nouns like ``Clang``, ``OpenCL``, ``GCC``,
142-
``Objective-C``, etc and language standard versions like ``C11`` or ``C++11``.
142+
``Objective-C``, etc. and language standard versions like ``C11`` or ``C++11``.
143143
* The wording should be succinct. If necessary, use a semicolon to combine
144144
sentence fragments instead of using complete sentences. e.g., prefer wording
145145
like ``'%0' is deprecated; it will be removed in a future release of Clang``
@@ -886,7 +886,7 @@ a string that the tablegen backend uses as a prefix to the
886886
LANG_OPTION_WITH_MARSHALLING([...], LangOpts->IgnoreExceptions, [...])
887887
#endif // LANG_OPTION_WITH_MARSHALLING
888888

889-
Such definition can be used used in the function for parsing and generating
889+
Such definition can be used in the function for parsing and generating
890890
command line:
891891

892892
.. code-block:: c++
@@ -1745,7 +1745,7 @@ will be found by the lookup, since it effectively replaces the first
17451745
declaration of "``f``".
17461746

17471747
(Note that because ``f`` can be redeclared at block scope, or in a friend
1748-
declaration, etc. it is possible that the declaration of ``f`` found by name
1748+
declaration, etc., it is possible that the declaration of ``f`` found by name
17491749
lookup will not be the most recent one.)
17501750

17511751
In the semantics-centric view, overloading of functions is represented
@@ -1945,7 +1945,7 @@ range of iterators over declarations of "``f``".
19451945
function ``DeclContext::getPrimaryContext`` retrieves the "primary" context for
19461946
a given ``DeclContext`` instance, which is the ``DeclContext`` responsible for
19471947
maintaining the lookup table used for the semantics-centric view. Given a
1948-
DeclContext, one can obtain the set of declaration contexts that are
1948+
``DeclContext``, one can obtain the set of declaration contexts that are
19491949
semantically connected to this declaration context, in source order, including
19501950
this context (which will be the only result, for non-namespace contexts) via
19511951
``DeclContext::collectAllContexts``. Note that these functions are used
@@ -1985,7 +1985,7 @@ broken code in the AST:
19851985
errors, the Decl node is marked as invalid.
19861986
- dropping invalid node: this often happens for errors that we don’t have
19871987
graceful recovery. Prior to Recovery AST, a mismatched-argument function call
1988-
expression was dropped though a CallExpr was created for semantic analysis.
1988+
expression was dropped though a ``CallExpr`` was created for semantic analysis.
19891989

19901990
With these strategies, clang surfaces better diagnostics, and provides AST
19911991
consumers a rich AST reflecting the written source code as much as possible even

clang/lib/Analysis/RetainSummaryManager.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,7 @@ static bool isSubclass(const Decl *D,
147147

148148
static bool isExactClass(const Decl *D, StringRef ClassName) {
149149
using namespace ast_matchers;
150-
DeclarationMatcher sameClassM =
151-
cxxRecordDecl(hasName(std::string(ClassName)));
150+
DeclarationMatcher sameClassM = cxxRecordDecl(hasName(ClassName));
152151
return !(match(sameClassM, *D, D->getASTContext()).empty());
153152
}
154153

lld/ELF/Arch/LoongArch.cpp

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo {
4646
private:
4747
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4848
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
49+
bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
50+
const Relocation &rLo12, uint64_t secAddr) const;
4951
};
5052
} // end anonymous namespace
5153

@@ -1155,6 +1157,78 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
11551157
}
11561158
}
11571159

1160+
// Try GOT indirection to PC relative optimization.
1161+
// From:
1162+
// * pcalau12i $a0, %got_pc_hi20(sym_got)
1163+
// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
1164+
// To:
1165+
// * pcalau12i $a0, %pc_hi20(sym)
1166+
// * addi.w/d $a0, $a0, %pc_lo12(sym)
1167+
//
1168+
// Note: Althouth the optimization has been performed, the GOT entries still
1169+
// exists, similarly to AArch64. Eliminating the entries will increase code
1170+
// complexity.
1171+
bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
1172+
const Relocation &rLo12, uint64_t secAddr) const {
1173+
// Check if the relocations apply to consecutive instructions.
1174+
if (rHi20.offset + 4 != rLo12.offset)
1175+
return false;
1176+
1177+
// Check if the relocations reference the same symbol and skip undefined,
1178+
// preemptible and STT_GNU_IFUNC symbols.
1179+
if (!rHi20.sym || rHi20.sym != rLo12.sym || !rHi20.sym->isDefined() ||
1180+
rHi20.sym->isPreemptible || rHi20.sym->isGnuIFunc())
1181+
return false;
1182+
1183+
// GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI
1184+
// in position-independent code because these instructions produce a relative
1185+
// address.
1186+
if ((ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
1187+
return false;
1188+
1189+
// Check if the addends of the both relocations are zero.
1190+
if (rHi20.addend != 0 || rLo12.addend != 0)
1191+
return false;
1192+
1193+
const uint32_t currInsn = read32le(loc);
1194+
const uint32_t nextInsn = read32le(loc + 4);
1195+
const uint32_t ldOpcode = ctx.arg.is64 ? LD_D : LD_W;
1196+
// Check if the first instruction is PCALAU12I and the second instruction is
1197+
// LD.
1198+
if ((currInsn & 0xfe000000) != PCALAU12I ||
1199+
(nextInsn & 0xffc00000) != ldOpcode)
1200+
return false;
1201+
1202+
// Check if use the same register.
1203+
if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
1204+
return false;
1205+
1206+
Symbol &sym = *rHi20.sym;
1207+
uint64_t symLocal = sym.getVA(ctx);
1208+
const int64_t displace = symLocal - getLoongArchPage(secAddr + rHi20.offset);
1209+
// Check if the symbol address is in
1210+
// [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800).
1211+
const int64_t underflow = -0x80000000LL - 0x800;
1212+
const int64_t overflow = 0x80000000LL - 0x800;
1213+
if (!(displace >= underflow && displace < overflow))
1214+
return false;
1215+
1216+
Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
1217+
rHi20.addend, &sym};
1218+
Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
1219+
&sym};
1220+
uint64_t pageDelta =
1221+
getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
1222+
// pcalau12i $a0, %pc_hi20
1223+
write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
1224+
relocate(loc, newRHi20, pageDelta);
1225+
// addi.w/d $a0, $a0, %pc_lo12
1226+
write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
1227+
getJ5(nextInsn), 0));
1228+
relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
1229+
return true;
1230+
}
1231+
11581232
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
11591233
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
11601234
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1172,6 +1246,30 @@ RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
11721246
return expr;
11731247
}
11741248

1249+
static bool pairForGotRels(ArrayRef<Relocation> relocs) {
1250+
// Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in
1251+
// pairs.
1252+
size_t i = 0;
1253+
const size_t size = relocs.size();
1254+
for (; i != size; ++i) {
1255+
if (relocs[i].type == R_LARCH_GOT_PC_HI20) {
1256+
if (i + 1 < size && relocs[i + 1].type == R_LARCH_GOT_PC_LO12) {
1257+
++i;
1258+
continue;
1259+
}
1260+
if (relaxable(relocs, i) && i + 2 < size &&
1261+
relocs[i + 2].type == R_LARCH_GOT_PC_LO12) {
1262+
i += 2;
1263+
continue;
1264+
}
1265+
break;
1266+
} else if (relocs[i].type == R_LARCH_GOT_PC_LO12) {
1267+
break;
1268+
}
1269+
}
1270+
return i == size;
1271+
}
1272+
11751273
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
11761274
const unsigned bits = ctx.arg.is64 ? 64 : 32;
11771275
uint64_t secAddr = sec.getOutputSection()->addr;
@@ -1181,6 +1279,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
11811279
secAddr += ehIn->getParent()->outSecOff;
11821280
bool isExtreme = false, isRelax = false;
11831281
const MutableArrayRef<Relocation> relocs = sec.relocs();
1282+
const bool isPairForGotRels = pairForGotRels(relocs);
11841283
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
11851284
Relocation &rel = relocs[i];
11861285
uint8_t *loc = buf + rel.offset;
@@ -1264,6 +1363,24 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
12641363
tlsdescToLe(loc, rel, val);
12651364
}
12661365
continue;
1366+
case RE_LOONGARCH_GOT_PAGE_PC:
1367+
// In LoongArch, we try GOT indirection to PC relative optimization in
1368+
// normal or medium code model, whether or not with R_LARCH_RELAX
1369+
// relocation. Moreover, if the original code sequence can be relaxed to a
1370+
// single instruction `pcaddi`, the first instruction will be removed and
1371+
// it will not reach here.
1372+
if (isPairForGotRels && rel.type == R_LARCH_GOT_PC_HI20) {
1373+
bool isRelax = relaxable(relocs, i);
1374+
const Relocation lo12Rel = isRelax ? relocs[i + 2] : relocs[i + 1];
1375+
if (lo12Rel.type == R_LARCH_GOT_PC_LO12 &&
1376+
tryGotToPCRel(loc, rel, lo12Rel, secAddr)) {
1377+
// isRelax: skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12
1378+
// !isRelax: skip relocation R_LARCH_GOT_PC_LO12
1379+
i += isRelax ? 2 : 1;
1380+
continue;
1381+
}
1382+
}
1383+
break;
12671384
default:
12681385
break;
12691386
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# REQUIRES: loongarch
2+
# RUN: rm -rf %t && split-file %s %t && cd %t
3+
4+
# RUN: llvm-mc --filetype=obj --triple=loongarch64 a.s -o a.o
5+
# RUN: llvm-mc --filetype=obj --triple=loongarch64 unpaired.s -o unpaired.o
6+
# RUN: llvm-mc --filetype=obj --triple=loongarch64 lone-ldr.s -o lone-ldr.o
7+
8+
# RUN: ld.lld a.o -T within-range.t -o a
9+
# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s
10+
11+
## This test verifies the encoding when the register $a0 is used.
12+
# CHECK: pcalau12i $a0, 0
13+
# CHECK-NEXT: addi.d $a0, $a0, -2048
14+
15+
## PCALAU12I contains a nonzero addend, no relaxations should be applied.
16+
# CHECK-NEXT: pcalau12i $a1, 2
17+
# CHECK-NEXT: ld.d $a1, $a1, -2048
18+
19+
## LD contains a nonzero addend, no relaxations should be applied.
20+
# CHECK-NEXT: pcalau12i $a2, 2
21+
# CHECK-NEXT: ld.d $a2, $a2, -2040
22+
23+
## PCALAU12I and LD use different registers, no relaxations should be applied.
24+
# CHECK-NEXT: pcalau12i $a3, 2
25+
# CHECK-NEXT: ld.d $a4, $a3, -2048
26+
27+
## PCALAU12I and LD use different registers, no relaxations should be applied.
28+
# CHECK-NEXT: pcalau12i $a5, 2
29+
# CHECK-NEXT: ld.d $a5, $a6, -2048
30+
31+
# RUN: ld.lld a.o -T underflow-range.t -o a-underflow
32+
# RUN: llvm-objdump -d --no-show-raw-insn a-underflow | FileCheck --check-prefix=OUTRANGE %s
33+
34+
# RUN: ld.lld a.o -T overflow-range.t -o a-overflow
35+
# RUN: llvm-objdump -d --no-show-raw-insn a-overflow | FileCheck --check-prefix=OUTRANGE %s
36+
37+
# OUTRANGE: pcalau12i $a0, 1
38+
# OUTRANGE-NEXT: ld.d $a0, $a0, 0
39+
40+
## Relocations do not appear in pairs, no relaxations should be applied.
41+
# RUN: ld.lld unpaired.o -T within-range.t -o unpaired
42+
# RUN: llvm-objdump --no-show-raw-insn -d unpaired | FileCheck --check-prefix=UNPAIRED %s
43+
44+
# UNPAIRED: pcalau12i $a0, 2
45+
# UNPAIRED-NEXT: b 8
46+
# UNPAIRED-NEXT: pcalau12i $a0, 2
47+
# UNPAIRED: ld.d $a0, $a0, -2048
48+
49+
## Relocations do not appear in pairs, no relaxations should be applied.
50+
# RUN: ld.lld lone-ldr.o -T within-range.t -o lone-ldr
51+
# RUN: llvm-objdump --no-show-raw-insn -d lone-ldr | FileCheck --check-prefix=LONE-LDR %s
52+
53+
# LONE-LDR: ld.d $a0, $a0, -2048
54+
55+
## 32-bit code is mostly the same. We only test a few variants.
56+
# RUN: llvm-mc --filetype=obj --triple=loongarch32 a.32.s -o a.32.o
57+
# RUN: ld.lld a.32.o -T within-range.t -o a32
58+
# RUN: llvm-objdump -d --no-show-raw-insn a32 | FileCheck --check-prefix=CHECK32 %s
59+
60+
## This test verifies the encoding when the register $a0 is used.
61+
# CHECK32: pcalau12i $a0, 0
62+
# CHECK32-NEXT: addi.w $a0, $a0, -2048
63+
64+
65+
## This linker script ensures that .rodata and .text are sufficiently close to
66+
## each other so that the pcalau12i + ld pair can be relaxed to pcalau12i + add.
67+
#--- within-range.t
68+
SECTIONS {
69+
.rodata 0x1800: { *(.rodata) }
70+
.text 0x2800: { *(.text) }
71+
.got 0x3800: { *(.got) }
72+
}
73+
74+
## This linker script ensures that .rodata and .text are sufficiently far apart
75+
## so that the pcalau12i + ld pair cannot be relaxed to pcalau12i + add.
76+
#--- underflow-range.t
77+
SECTIONS {
78+
.rodata 0x800-4: { *(.rodata) }
79+
.got 0x80002000: { *(.got) }
80+
.text 0x80001000: { *(.text) } /* (0x800-4)+2GB+0x800+4 */
81+
}
82+
83+
#--- overflow-range.t
84+
SECTIONS {
85+
.text 0x1000: { *(.text) }
86+
.got 0x2000: { *(.got) }
87+
.rodata 0x80000800 : { *(.rodata) } /* 0x1000+2GB-0x800 */
88+
}
89+
90+
#--- a.s
91+
## Symbol 'x' is nonpreemptible, the optimization should be applied.
92+
.rodata
93+
.hidden x
94+
x:
95+
.word 10
96+
97+
.text
98+
.global _start
99+
_start:
100+
pcalau12i $a0, %got_pc_hi20(x)
101+
ld.d $a0, $a0, %got_pc_lo12(x)
102+
pcalau12i $a1, %got_pc_hi20(x+1)
103+
ld.d $a1, $a1, %got_pc_lo12(x)
104+
pcalau12i $a2, %got_pc_hi20(x)
105+
ld.d $a2, $a2, %got_pc_lo12(x+8)
106+
pcalau12i $a3, %got_pc_hi20(x)
107+
ld.d $a4, $a3, %got_pc_lo12(x)
108+
pcalau12i $a5, %got_pc_hi20(x)
109+
ld.d $a5, $a6, %got_pc_lo12(x)
110+
111+
#--- unpaired.s
112+
.text
113+
.hidden x
114+
x:
115+
nop
116+
.global _start
117+
_start:
118+
pcalau12i $a0, %got_pc_hi20(x)
119+
b L
120+
pcalau12i $a0, %got_pc_hi20(x)
121+
L:
122+
ld.d $a0, $a0, %got_pc_lo12(x)
123+
124+
#--- lone-ldr.s
125+
.text
126+
.hidden x
127+
x:
128+
nop
129+
.global _start
130+
_start:
131+
ld.d $a0, $a0, %got_pc_lo12(x)
132+
133+
134+
#--- a.32.s
135+
## Symbol 'x' is nonpreemptible, the optimization should be applied.
136+
.rodata
137+
.hidden x
138+
x:
139+
.word 10
140+
141+
.text
142+
.global _start
143+
_start:
144+
pcalau12i $a0, %got_pc_hi20(x)
145+
ld.w $a0, $a0, %got_pc_lo12(x)

lld/test/ELF/loongarch-relax-pc-hi20-lo12.s

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,26 @@
3131
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
3232
# NORELAX32-NEXT: 10000: pcalau12i $a0, 1024
3333
# NORELAX32-NEXT: addi.w $a0, $a0, 0
34+
## Not relaxation, convertion to PCRel.
3435
# NORELAX32-NEXT: pcalau12i $a0, 1024
35-
# NORELAX32-NEXT: ld.w $a0, $a0, 4
36+
# NORELAX32-NEXT: addi.w $a0, $a0, 0
3637
# NORELAX32-NEXT: pcalau12i $a0, 1024
3738
# NORELAX32-NEXT: addi.w $a0, $a0, 0
3839
# NORELAX32-NEXT: pcalau12i $a0, 1024
39-
# NORELAX32-NEXT: ld.w $a0, $a0, 4
40+
# NORELAX32-NEXT: addi.w $a0, $a0, 0
4041

4142
# NORELAX64-LABEL: <_start>:
4243
## offset exceed range of pcaddi
4344
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
4445
# NORELAX64-NEXT: 10000: pcalau12i $a0, 1024
4546
# NORELAX64-NEXT: addi.d $a0, $a0, 0
47+
## Not relaxation, convertion to PCRel.
4648
# NORELAX64-NEXT: pcalau12i $a0, 1024
47-
# NORELAX64-NEXT: ld.d $a0, $a0, 8
49+
# NORELAX64-NEXT: addi.d $a0, $a0, 0
4850
# NORELAX64-NEXT: pcalau12i $a0, 1024
4951
# NORELAX64-NEXT: addi.d $a0, $a0, 0
5052
# NORELAX64-NEXT: pcalau12i $a0, 1024
51-
# NORELAX64-NEXT: ld.d $a0, $a0, 8
53+
# NORELAX64-NEXT: addi.d $a0, $a0, 0
5254

5355

5456
## GOT references with non-zero addends. No relaxation.

lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
class ExprDefinitionInDylibTestCase(TestBase):
88
NO_DEBUG_INFO_TESTCASE = True
99

10+
@skipIfWindows
1011
def test(self):
1112
"""
1213
Tests that we can call functions whose definition

0 commit comments

Comments
 (0)