Skip to content

Commit 362a6f6

Browse files
yjndinyy
authored andcommitted
[BOLT][RISCV]fix up GOT Relocation Handling
1 parent a102342 commit 362a6f6

File tree

5 files changed

+199
-34
lines changed

5 files changed

+199
-34
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,12 @@ class MCPlusBuilder {
192192

193193
SrcInst.erase(AnnotationOp, SrcInst.end());
194194
}
195+
/// Copy annotations from \p SrcInst to \p DstInst.
196+
void copyAnnotations (MCInst &SrcInst, MCInst &DstInst) const {
197+
MCInst::iterator AnnotationOp = getAnnotationInstOp(SrcInst);
198+
for (MCInst::iterator Iter = AnnotationOp; Iter != SrcInst.end(); ++Iter)
199+
DstInst.addOperand(*Iter);
200+
}
195201

196202
/// Return iterator range covering def operands.
197203
iterator_range<MCInst::iterator> defOperands(MCInst &Inst) const {
@@ -839,6 +845,29 @@ class MCPlusBuilder {
839845
return StringRef();
840846
}
841847

848+
/// Returns the base register used by the instruction.
849+
virtual unsigned getBaseReg(const MCInst &Inst) const{
850+
llvm_unreachable("not implemented");
851+
return 0;
852+
}
853+
854+
/// Matches a pair of instructions that implement a GOT load:
855+
/// an AUIPC (loading the high part of the address)
856+
/// followed by a GOT-loading instruction (loading the low part of the address).
857+
virtual bool matchGotAuipcPair(const MCInst &Inst) const{
858+
llvm_unreachable("not implemented");
859+
return false;
860+
}
861+
862+
/// Try to find a symbol referenced by a PC-relative LO (low 12 bits)
863+
// relocation in the instruction.
864+
virtual const MCSymbol *getPCRelLoSymbol(const MCInst &Inst) const{
865+
llvm_unreachable("not implemented");
866+
return nullptr;
867+
}
868+
869+
870+
842871
/// Interface and basic functionality of a MCInstMatcher. The idea is to make
843872
/// it easy to match one or more MCInsts against a tree-like pattern and
844873
/// extract the fragment operands. Example:
@@ -2303,7 +2332,7 @@ class MCPlusBuilder {
23032332
// We have to use at least 2-byte alignment for functions because of C++
23042333
// ABI.
23052334
return 2;
2306-
}
2335+
}
23072336

23082337
// AliasMap caches a mapping of registers to the set of registers that
23092338
// alias (are sub or superregs of itself, including itself).

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,12 +1457,42 @@ Error BinaryFunction::disassemble() {
14571457
if (BC.isAArch64())
14581458
handleAArch64IndirectCall(Instruction, Offset);
14591459
}
1460-
} else if (BC.isRISCV()) {
1461-
// Check if there's a relocation associated with this instruction.
1462-
for (auto Itr = Relocations.lower_bound(Offset),
1463-
ItrE = Relocations.lower_bound(Offset + Size);
1460+
}
1461+
1462+
add_instruction:
1463+
if (getDWARFLineTable()) {
1464+
Instruction.setLoc(findDebugLineInformationForInstructionAt(
1465+
AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1466+
}
1467+
1468+
// Record offset of the instruction for profile matching.
1469+
if (BC.keepOffsetForInstruction(Instruction))
1470+
MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1471+
1472+
if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
1473+
// NOTE: disassembly loses the correct size information for noops on x86.
1474+
// E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1475+
// 5 bytes. Preserve the size info using annotations.
1476+
MIB->setSize(Instruction, Size);
1477+
}
1478+
1479+
addInstruction(Offset, std::move(Instruction));
1480+
}
1481+
if(BC.isRISCV()){
1482+
for (auto CurInstrIt = Instructions.begin(); CurInstrIt != Instructions.end(); ++CurInstrIt) {
1483+
uint64_t CurOffset = CurInstrIt->first;
1484+
if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(CurOffset)) continue;
1485+
1486+
if(MIB->isBranch(CurInstrIt->second) || MIB->isCall(CurInstrIt->second)) continue;
1487+
if (MIB->isPseudo(CurInstrIt->second)) continue;
1488+
if (isZeroPaddingAt(CurInstrIt->first)) continue;
1489+
1490+
auto NextInstrIt = std::next(CurInstrIt);
1491+
uint64_t NextOffset = (NextInstrIt != Instructions.end()) ? NextInstrIt->first : getSize();
1492+
for (auto Itr = Relocations.lower_bound(CurOffset),
1493+
ItrE = Relocations.lower_bound(NextOffset);
14641494
Itr != ItrE; ++Itr) {
1465-
const Relocation &Relocation = Itr->second;
1495+
Relocation &Relocation = Itr->second;
14661496
MCSymbol *Symbol = Relocation.Symbol;
14671497

14681498
if (Relocation::isInstructionReference(Relocation.Type)) {
@@ -1484,35 +1514,51 @@ Error BinaryFunction::disassemble() {
14841514
if (Relocation::isGOT(Relocation.Type)) {
14851515
assert(Relocation::isPCRelative(Relocation.Type) &&
14861516
"GOT relocation must be PC-relative on RISC-V");
1517+
// For RISC-V, we need to find the next instruction
1518+
// that matches the current instruction's base register.
1519+
auto NextInstrIt = std::next(CurInstrIt);
1520+
unsigned CurReg = BC.MIB->getBaseReg(CurInstrIt->second);
1521+
while (NextInstrIt != Instructions.end()) {
1522+
MCInst &NextInst = NextInstrIt->second;
1523+
unsigned NextReg = BC.MIB->getBaseReg(NextInst);
1524+
// some case there exit extra auipc instruction
1525+
// like auipc+auipc+ld instruction,so we need skip it
1526+
if(CurReg == NextReg && !BC.MIB->matchGotAuipcPair(NextInst)) {
1527+
break;
1528+
}
1529+
if(CurReg == NextReg && BC.MIB->matchGotAuipcPair(NextInst)){
1530+
1531+
int64_t CurImm = 0;
1532+
for (const MCOperand &Op : CurInstrIt->second) {
1533+
if (Op.isImm()) {
1534+
CurImm = Op.getImm();
1535+
break;
1536+
}
1537+
}
1538+
int64_t NextImm = 0;
1539+
for (const MCOperand &Op : NextInstrIt->second) {
1540+
if (Op.isImm()) {
1541+
NextImm = Op.getImm();
1542+
break;
1543+
}
1544+
}
1545+
Relocation.Value = (CurImm << 12) + NextImm;
1546+
break;
1547+
}
1548+
NextInstrIt = std::next(NextInstrIt);
1549+
}
14871550
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
14881551
Addend = Relocation.Value + Relocation.Offset + getAddress();
1489-
}
1490-
int64_t Value = Relocation.Value;
1491-
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1492-
Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
1493-
(void)Result;
1494-
assert(Result && "cannot replace immediate with relocation");
1495-
}
1496-
}
14971552

1498-
add_instruction:
1499-
if (getDWARFLineTable()) {
1500-
Instruction.setLoc(findDebugLineInformationForInstructionAt(
1501-
AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
1502-
}
1503-
1504-
// Record offset of the instruction for profile matching.
1505-
if (BC.keepOffsetForInstruction(Instruction))
1506-
MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
1553+
}
1554+
int64_t Value = Relocation.Value;
1555+
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1556+
CurInstrIt->second, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
1557+
(void)Result;
1558+
assert(Result && "cannot replace immediate with relocation");
15071559

1508-
if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
1509-
// NOTE: disassembly loses the correct size information for noops on x86.
1510-
// E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
1511-
// 5 bytes. Preserve the size info using annotations.
1512-
MIB->setSize(Instruction, Size);
1560+
}
15131561
}
1514-
1515-
addInstruction(Offset, std::move(Instruction));
15161562
}
15171563

15181564
for (auto [Offset, Label] : InstructionLabels) {

bolt/lib/Passes/FixRISCVCallsPass.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,50 @@ void FixRISCVCallsPass::runOnFunction(BinaryFunction &BF) {
2020
auto &BC = BF.getBinaryContext();
2121
auto &MIB = BC.MIB;
2222
auto *Ctx = BC.Ctx.get();
23+
// Since JitLink currently only supports adjacent AUIPC/LO12 pairs,
24+
// we must guarantee that the label for the AUIPC is
25+
// immediately before the LO12 instruction.
26+
for (auto &BB : BF) {
27+
for (auto II = BB.begin(); II != BB.end(); ) {
28+
const MCInst &AInst = *II;
29+
const MCSymbol *TargetSym = nullptr;
30+
if (MIB->isPseudo(*II)) {
31+
++II;
32+
continue;
33+
}
34+
TargetSym = MIB->getPCRelLoSymbol(AInst);
35+
if (TargetSym) {
36+
auto BI = II;
37+
bool foundSymbol = false;
38+
while(BI != BB.begin()){
39+
auto *Label = MIB->getInstLabel(*BI);
40+
if (Label && Label == TargetSym) {
41+
foundSymbol = true;
42+
break;
43+
}
44+
BI--;
45+
}
46+
if (foundSymbol && std::next(BI) != II) {
47+
MCInst SavedInst = *BI;
48+
MIB->copyAnnotations(*BI, SavedInst);
49+
BB.eraseInstruction(BI);
50+
II = BB.insertInstruction(--II, std::move(SavedInst));
51+
52+
// Verify that the label of the current instruction matches
53+
// the getPCRelLoSymbol of the next instruction
54+
auto NextII = std::next(II);
55+
if (NextII != BB.end()) {
56+
auto *CurrentLabel = MIB->getInstLabel(*II);
57+
auto *NextTargetSym = MIB->getPCRelLoSymbol(*NextII);
58+
assert(CurrentLabel && NextTargetSym && CurrentLabel == NextTargetSym &&
59+
"Label and target symbol mismatch after instruction reordering");
60+
}
61+
II++;
62+
}
63+
}
64+
++II;
65+
}
66+
}
2367

2468
for (auto &BB : BF) {
2569
for (auto II = BB.begin(); II != BB.end();) {

bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,42 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
339339
}
340340
}
341341

342+
unsigned getBaseReg(const MCInst &Inst) const override{
343+
switch (Inst.getOpcode()) {
344+
default:
345+
return 0;
346+
case RISCV::AUIPC:
347+
return Inst.getOperand(0).getReg();
348+
case RISCV::ADDI:
349+
case RISCV::LD:
350+
return Inst.getOperand(1).getReg();
351+
}
352+
}
353+
354+
bool matchGotAuipcPair(const MCInst &Inst) const override{
355+
return Inst.getOpcode() == RISCV::ADDI ||
356+
Inst.getOpcode() == RISCV::LD ||
357+
Inst.getOpcode() == RISCV::C_JAL ||
358+
Inst.getOpcode() == RISCV::C_BEQZ ||
359+
Inst.getOpcode() == RISCV::C_BNEZ;
360+
}
361+
362+
363+
const MCSymbol *getPCRelLoSymbol(const MCInst &Inst) const override {
364+
for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
365+
const auto &Op = Inst.getOperand(i);
366+
if (!Op.isExpr())
367+
continue;
368+
const MCExpr *Expr = Op.getExpr();
369+
auto *Spec = llvm::dyn_cast<MCSpecifierExpr>(Expr);
370+
if (Spec && Spec->getSpecifier() == RISCV::S_PCREL_LO) {
371+
return getTargetSymbol(Spec->getSubExpr());
372+
}
373+
}
374+
return nullptr;
375+
}
376+
377+
342378
const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
343379
auto *RISCVExpr = dyn_cast<MCSpecifierExpr>(Expr);
344380
if (RISCVExpr && RISCVExpr->getSubExpr())

bolt/test/RISCV/reloc-got.s

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang %cflags -o %t %s
2-
// RUN: llvm-bolt --print-cfg --print-only=_start -o %t.null %t \
2+
// RUN: llvm-bolt --print-finalized --print-only=_start -o %t.null %t \
33
// RUN: | FileCheck %s
44

55
.data
@@ -8,16 +8,26 @@
88
d:
99
.dword 0
1010

11+
.globl e
12+
.p2align 3
13+
e:
14+
.dword 0
15+
1116
.text
1217
.globl _start
1318
.p2align 1
1419
// CHECK: Binary Function "_start" after building cfg {
1520
_start:
1621
nop // Here to not make the _start and .Ltmp0 symbols coincide
17-
// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
18-
// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
22+
// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
23+
// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
24+
// CHECK: auipc t1, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp1
25+
// CHECK-NEXT: ld t1, %pcrel_lo(.Ltmp1)(t1)
1926
1:
2027
auipc t0, %got_pcrel_hi(d)
28+
2:
29+
auipc t1, %got_pcrel_hi(e)
2130
ld t0, %pcrel_lo(1b)(t0)
31+
ld t1, %pcrel_lo(2b)(t1)
2232
ret
23-
.size _start, .-_start
33+
.size _start, .-_start

0 commit comments

Comments
 (0)