Skip to content

Commit 121bc3e

Browse files
committed
runtime/pprof: remove hard-coded sleep in CPU profile reader
The CPU profiler reader goroutine has a hard-coded 100ms sleep between reads of the CPU profile sample buffer. This is done because waking up the CPU profile reader is not signal-safe on some platforms. As a consequence, stopping the profiler takes 200ms (one iteration to read the last samples and one to see the "eof"), and on many-core systems the reader does not wake up frequently enought to keep up with incoming data. This CL removes the sleep where it is safe to do so, following a suggestion by Austin Clements in the comments on CL 445375. We let the reader fully block, and wake up the reader when the buffer is over half-full. Fixes #63043 Updates #56029 Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-arm64-longtest,gotip-linux-386-longtest Change-Id: I9f7e7e9918a4a6f16e80f6aaf33103126568a81f Reviewed-on: https://go-review.googlesource.com/c/go/+/610815 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Mark Freeman <[email protected]>
1 parent b604148 commit 121bc3e

File tree

3 files changed

+103
-3
lines changed

3 files changed

+103
-3
lines changed

src/runtime/pprof/pprof.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,10 @@ func profileWriter(w io.Writer) {
924924
b := newProfileBuilder(w)
925925
var err error
926926
for {
927-
time.Sleep(100 * time.Millisecond)
927+
if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
928+
// see runtime_pprof_readProfile
929+
time.Sleep(100 * time.Millisecond)
930+
}
928931
data, tags, eof := readProfile()
929932
if e := b.addCPUData(data, tags); e != nil && err == nil {
930933
err = e

src/runtime/profbuf.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ import (
3838
// be returned by read. By definition, r ≤ rNext ≤ w (before wraparound),
3939
// and rNext is only used by the reader, so it can be accessed without atomics.
4040
//
41+
// If the reader is blocked waiting for more data, the writer will wake it up if
42+
// either the buffer is more than half full, or when the writer sets the eof
43+
// marker or writes overflow entries (described below.)
44+
//
4145
// If the writer gets ahead of the reader, so that the buffer fills,
4246
// future writes are discarded and replaced in the output stream by an
4347
// overflow entry, which has size 2+hdrsize+1, time set to the time of
@@ -378,11 +382,28 @@ func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []u
378382
// Racing with reader setting flag bits in b.w, to avoid lost wakeups.
379383
old := b.w.load()
380384
new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1)
385+
// We re-load b.r here to reduce the likelihood of early wakeups
386+
// if the reader already consumed some data between the last
387+
// time we read b.r and now. This isn't strictly necessary.
388+
unread := countSub(new.dataCount(), b.r.load().dataCount())
389+
if unread < 0 {
390+
// The new count overflowed and wrapped around.
391+
unread += len(b.data)
392+
}
393+
wakeupThreshold := len(b.data) / 2
394+
if unread < wakeupThreshold {
395+
// Carry over the sleeping flag since we're not planning
396+
// to wake the reader yet
397+
new |= old & profReaderSleeping
398+
}
381399
if !b.w.cas(old, new) {
382400
continue
383401
}
384-
// If there was a reader, wake it up.
385-
if old&profReaderSleeping != 0 {
402+
// If we've hit our high watermark for data in the buffer,
403+
// and there is a reader, wake it up.
404+
if unread >= wakeupThreshold && old&profReaderSleeping != 0 {
405+
// NB: if we reach this point, then the sleeping bit is
406+
// cleared in the new b.w value
386407
notewakeup(&b.wait)
387408
}
388409
break

src/runtime/profbuf_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@
55
package runtime_test
66

77
import (
8+
"fmt"
9+
"regexp"
10+
"runtime"
811
. "runtime"
912
"slices"
13+
"sync"
1014
"testing"
1115
"time"
1216
"unsafe"
@@ -190,3 +194,75 @@ func TestProfBufDoubleWakeup(t *testing.T) {
190194
}
191195
}
192196
}
197+
198+
func TestProfBufWakeup(t *testing.T) {
199+
b := NewProfBuf(2, 16, 2)
200+
var wg sync.WaitGroup
201+
wg.Go(func() {
202+
read := 0
203+
for {
204+
rdata, _, eof := b.Read(ProfBufBlocking)
205+
if read == 0 && len(rdata) < 8 {
206+
t.Errorf("first wake up at less than half full, got %x", rdata)
207+
}
208+
read += len(rdata)
209+
if eof {
210+
return
211+
}
212+
}
213+
})
214+
215+
// Under the hood profBuf uses notetsleepg when the reader blocks.
216+
// Different platforms have different implementations, leading to
217+
// different statuses we need to look for to determine whether the
218+
// reader is blocked.
219+
var waitStatus string
220+
switch runtime.GOOS {
221+
case "js":
222+
waitStatus = "waiting"
223+
case "wasip1":
224+
waitStatus = "runnable"
225+
default:
226+
waitStatus = "syscall"
227+
}
228+
229+
// Ensure that the reader is blocked
230+
awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
231+
// NB: this writes 6 words not 4. Fine for the test.
232+
// The reader shouldn't wake up for this
233+
b.Write(nil, 1, []uint64{1, 2}, []uintptr{3, 4})
234+
235+
// The reader should still be blocked
236+
//
237+
// TODO(nick): this is racy. We could Gosched and still have the reader
238+
// blocked in a buggy implementation because it just didn't get a chance
239+
// to run
240+
awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
241+
b.Write(nil, 1, []uint64{5, 6}, []uintptr{7, 8})
242+
b.Close()
243+
244+
// Wait here so we can be sure the reader got the data
245+
wg.Wait()
246+
}
247+
248+
// see also runtime/pprof tests
249+
func awaitBlockedGoroutine(state, fName string) {
250+
re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime_test\.%s`, regexp.QuoteMeta(state), fName)
251+
r := regexp.MustCompile(re)
252+
253+
buf := make([]byte, 64<<10)
254+
for {
255+
Gosched()
256+
n := Stack(buf, true)
257+
if n == len(buf) {
258+
// Buffer wasn't large enough for a full goroutine dump.
259+
// Resize it and try again.
260+
buf = make([]byte, 2*len(buf))
261+
continue
262+
}
263+
const count = 1
264+
if len(r.FindAll(buf[:n], -1)) >= count {
265+
return
266+
}
267+
}
268+
}

0 commit comments

Comments
 (0)