Skip to content

Commit a13712e

Browse files
authored
[lldb] Fix a crash in lldb-server during RemoveSoftwareBreakpoint() (#148738)
# Lldb-server crash We have seen stacks like the following in lldb-server core dumps: ``` [ "__GI___pthread_kill at pthread_kill.c:46", "__GI_raise at raise.c:26", "__GI_abort at abort.c:100", "__assert_fail_base at assert.c:92", "__GI___assert_fail at assert.c:101", "lldb_private::NativeProcessProtocol::RemoveSoftwareBreakpoint(unsigned long) at /redacted/lldb-server:0" ] ``` # Hypothesis of root cause In `NativeProcessProtocol::RemoveSoftwareBreakpoint()` ([code](https://github.com/llvm/llvm-project/blob/19b2dd9d798c124406b0124a1b8debb711675281/lldb/source/Host/common/NativeProcessProtocol.cpp#L359-L423)), a `ref_count` is asserted and reduced. If it becomes zero, the code first go through a series of memory reads and writes to remove the breakpoint trap opcode and to restore the original process code, then, if everything goes fine, removes the entry from the map `m_software_breakpoints` at the end of the function. However, if any of the validations for the above reads and writes goes wrong, the code returns an error early, skipping the removal of the entry. This leaves the entry behind with a `ref_count` of zero. The next call to `NativeProcessProtocol::RemoveSoftwareBreakpoint()` for the same breakpoint[*] would violate the assertion about `ref_count > 0` ([here](https://github.com/llvm/llvm-project/blob/19b2dd9d798c124406b0124a1b8debb711675281/lldb/source/Host/common/NativeProcessProtocol.cpp#L365)), which would cause a crash. [*] We haven't found a *regular* way to repro such a next call in lldb or lldb-dap. This is because both of them remove the breakpoint from their internal list when they get any response from the lldb-server (OK or error). Asking the client to delete the breakpoint a second time doesn't trigger the client to send the `$z` gdb packet to lldb-server. We are able to trigger the crash by sending the `$z` packet directly, see "Manual test" below. # Fix Lift the removal of the map entry to be immediately after the decrement of `ref_count`, before the early returns. This ensures that the asserted case will never happen. The validation errors can still happen, and whether they happen or not, the breakpoint has been removed from the perspective of the lldb-server (same as that of lldb and lldb-dap). # Manual test & unit test See PR.
1 parent d9190f8 commit a13712e

File tree

2 files changed

+104
-7
lines changed

2 files changed

+104
-7
lines changed

lldb/source/Host/common/NativeProcessProtocol.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -366,12 +366,19 @@ Status NativeProcessProtocol::RemoveSoftwareBreakpoint(lldb::addr_t addr) {
366366
if (--it->second.ref_count > 0)
367367
return Status();
368368

369+
// Remove the entry from m_software_breakpoints rightaway, so that we don't
370+
// leave behind an entry with ref_count == 0 in case one of the following
371+
// conditions returns an error. The breakpoint is moved so that it can be
372+
// accessed below.
373+
SoftwareBreakpoint bkpt = std::move(it->second);
374+
m_software_breakpoints.erase(it);
375+
369376
// This is the last reference. Let's remove the breakpoint.
370377
Status error;
371378

372379
// Clear a software breakpoint instruction
373-
llvm::SmallVector<uint8_t, 4> curr_break_op(
374-
it->second.breakpoint_opcodes.size(), 0);
380+
llvm::SmallVector<uint8_t, 4> curr_break_op(bkpt.breakpoint_opcodes.size(),
381+
0);
375382

376383
// Read the breakpoint opcode
377384
size_t bytes_read = 0;
@@ -382,10 +389,10 @@ Status NativeProcessProtocol::RemoveSoftwareBreakpoint(lldb::addr_t addr) {
382389
"addr=0x%" PRIx64 ": tried to read %zu bytes but only read %zu", addr,
383390
curr_break_op.size(), bytes_read);
384391
}
385-
const auto &saved = it->second.saved_opcodes;
392+
const auto &saved = bkpt.saved_opcodes;
386393
// Make sure the breakpoint opcode exists at this address
387-
if (llvm::ArrayRef(curr_break_op) != it->second.breakpoint_opcodes) {
388-
if (curr_break_op != it->second.saved_opcodes)
394+
if (llvm::ArrayRef(curr_break_op) != bkpt.breakpoint_opcodes) {
395+
if (curr_break_op != bkpt.saved_opcodes)
389396
return Status::FromErrorString(
390397
"Original breakpoint trap is no longer in memory.");
391398
LLDB_LOG(log,
@@ -418,7 +425,6 @@ Status NativeProcessProtocol::RemoveSoftwareBreakpoint(lldb::addr_t addr) {
418425
llvm::make_range(saved.begin(), saved.end()));
419426
}
420427

421-
m_software_breakpoints.erase(it);
422428
return Status();
423429
}
424430

lldb/unittests/Host/NativeProcessProtocolTest.cpp

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,97 @@ TEST(NativeProcessProtocolTest, SetBreakpointFailVerify) {
7373
llvm::Failed());
7474
}
7575

76+
TEST(NativeProcessProtocolTest, RemoveSoftwareBreakpoint) {
77+
NiceMock<MockDelegate> DummyDelegate;
78+
MockProcess<NativeProcessProtocol> Process(DummyDelegate,
79+
ArchSpec("x86_64-pc-linux"));
80+
auto Trap = cantFail(Process.GetSoftwareBreakpointTrapOpcode(1));
81+
auto Original = std::vector<uint8_t>{0xbb};
82+
83+
// Set up a breakpoint.
84+
{
85+
InSequence S;
86+
EXPECT_CALL(Process, ReadMemory(0x47, 1))
87+
.WillOnce(Return(ByMove(Original)));
88+
EXPECT_CALL(Process, WriteMemory(0x47, Trap)).WillOnce(Return(ByMove(1)));
89+
EXPECT_CALL(Process, ReadMemory(0x47, 1)).WillOnce(Return(ByMove(Trap)));
90+
EXPECT_THAT_ERROR(Process.SetBreakpoint(0x47, 0, false).ToError(),
91+
llvm::Succeeded());
92+
}
93+
94+
// Remove the breakpoint for the first time. This should remove the breakpoint
95+
// from m_software_breakpoints.
96+
//
97+
// Should succeed.
98+
{
99+
InSequence S;
100+
EXPECT_CALL(Process, ReadMemory(0x47, 1)).WillOnce(Return(ByMove(Trap)));
101+
EXPECT_CALL(Process, WriteMemory(0x47, llvm::ArrayRef(Original)))
102+
.WillOnce(Return(ByMove(1)));
103+
EXPECT_CALL(Process, ReadMemory(0x47, 1))
104+
.WillOnce(Return(ByMove(Original)));
105+
EXPECT_THAT_ERROR(Process.RemoveBreakpoint(0x47, false).ToError(),
106+
llvm::Succeeded());
107+
}
108+
109+
// Remove the breakpoint for the second time.
110+
//
111+
// Should fail. None of the ReadMemory() or WriteMemory() should be called,
112+
// because the function should early return when seeing that the breakpoint
113+
// isn't in m_software_breakpoints.
114+
{
115+
EXPECT_CALL(Process, ReadMemory(_, _)).Times(0);
116+
EXPECT_CALL(Process, WriteMemory(_, _)).Times(0);
117+
EXPECT_THAT_ERROR(Process.RemoveBreakpoint(0x47, false).ToError(),
118+
llvm::Failed());
119+
}
120+
}
121+
122+
TEST(NativeProcessProtocolTest, RemoveSoftwareBreakpointMemoryError) {
123+
NiceMock<MockDelegate> DummyDelegate;
124+
MockProcess<NativeProcessProtocol> Process(DummyDelegate,
125+
ArchSpec("x86_64-pc-linux"));
126+
auto Trap = cantFail(Process.GetSoftwareBreakpointTrapOpcode(1));
127+
auto Original = std::vector<uint8_t>{0xbb};
128+
auto SomethingElse = std::vector<uint8_t>{0xaa};
129+
130+
// Set up a breakpoint.
131+
{
132+
InSequence S;
133+
EXPECT_CALL(Process, ReadMemory(0x47, 1))
134+
.WillOnce(Return(ByMove(Original)));
135+
EXPECT_CALL(Process, WriteMemory(0x47, Trap)).WillOnce(Return(ByMove(1)));
136+
EXPECT_CALL(Process, ReadMemory(0x47, 1)).WillOnce(Return(ByMove(Trap)));
137+
EXPECT_THAT_ERROR(Process.SetBreakpoint(0x47, 0, false).ToError(),
138+
llvm::Succeeded());
139+
}
140+
141+
// Remove the breakpoint for the first time, with an unexpected value read by
142+
// the first ReadMemory(). This should cause an early return, with the
143+
// breakpoint removed from m_software_breakpoints.
144+
//
145+
// Should fail.
146+
{
147+
InSequence S;
148+
EXPECT_CALL(Process, ReadMemory(0x47, 1))
149+
.WillOnce(Return(ByMove(SomethingElse)));
150+
EXPECT_THAT_ERROR(Process.RemoveBreakpoint(0x47, false).ToError(),
151+
llvm::Failed());
152+
}
153+
154+
// Remove the breakpoint for the second time.
155+
//
156+
// Should fail. None of the ReadMemory() or WriteMemory() should be called,
157+
// because the function should early return when seeing that the breakpoint
158+
// isn't in m_software_breakpoints.
159+
{
160+
EXPECT_CALL(Process, ReadMemory(_, _)).Times(0);
161+
EXPECT_CALL(Process, WriteMemory(_, _)).Times(0);
162+
EXPECT_THAT_ERROR(Process.RemoveBreakpoint(0x47, false).ToError(),
163+
llvm::Failed());
164+
}
165+
}
166+
76167
TEST(NativeProcessProtocolTest, ReadMemoryWithoutTrap) {
77168
NiceMock<MockDelegate> DummyDelegate;
78169
MockProcess<NativeProcessProtocol> Process(DummyDelegate,
@@ -146,4 +237,4 @@ TEST(NativeProcessProtocolTest, ReadCStringFromMemory_CrossPageBoundary) {
146237
bytes_read),
147238
llvm::HasValue(llvm::StringRef("hello")));
148239
EXPECT_EQ(bytes_read, 6UL);
149-
}
240+
}

0 commit comments

Comments
 (0)