Skip to content

Commit d68aec8

Browse files
committed
runtime: replace trace seqlock with write flag
The runtime tracer currently uses a per-M seqlock to indicate whether a thread is writing to a local trace buffer. The seqlock is updated with two atomic adds, read-modify-write operations. These are quite expensive, even though they're completely uncontended. We can make these operations slightly cheaper by using an atomic store. The key insight here is that only one thread ever writes to the value at a time, so only the "write" of the read-modify-write actually matters. At that point, it doesn't really matter that we have a monotonically increasing counter. This is made clearer by the fact that nothing other than basic checks make sure the counter is monotonically increasing: everything only depends on whether the counter is even or odd. At that point, all we really need is a flag: an atomic.Bool, which we can update with an atomic Store, a write-only instruction. Change-Id: I0cfe39b34c7634554c34c53c0f0e196d125bbc4a Reviewed-on: https://go-review.googlesource.com/c/go/+/721840 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 8d9906c commit d68aec8

File tree

3 files changed

+27
-38
lines changed

3 files changed

+27
-38
lines changed

src/runtime/trace.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@
1313
// ## Design
1414
//
1515
// The basic idea behind the the execution tracer is to have per-M buffers that
16-
// trace data may be written into. Each M maintains a seqlock indicating whether
16+
// trace data may be written into. Each M maintains a write flag indicating whether
1717
// its trace buffer is currently in use.
1818
//
1919
// Tracing is initiated by StartTrace, and proceeds in "generations," with each
2020
// generation being marked by a call to traceAdvance, to advance to the next
2121
// generation. Generations are a global synchronization point for trace data,
2222
// and we proceed to a new generation by moving forward trace.gen. Each M reads
23-
// trace.gen under its own seqlock to determine which generation it is writing
23+
// trace.gen under its own write flag to determine which generation it is writing
2424
// trace data for. To this end, each M has 2 slots for buffers: one slot for the
2525
// previous generation, one slot for the current one. It uses tl.gen to select
26-
// which buffer slot to write to. Simultaneously, traceAdvance uses the seqlock
26+
// which buffer slot to write to. Simultaneously, traceAdvance uses the write flag
2727
// to determine whether every thread is guaranteed to observe an updated
2828
// trace.gen. Once it is sure, it may then flush any buffers that are left over
2929
// from the previous generation safely, since it knows the Ms will not mutate
@@ -43,7 +43,7 @@
4343
// appear in pairs: one for the previous generation, and one for the current one.
4444
// Like the per-M buffers, which of the two is written to is selected using trace.gen,
4545
// and anything managed this way must similarly be mutated only in traceAdvance or
46-
// under the M's seqlock.
46+
// under the M's write flag.
4747
//
4848
// Trace events themselves are simple. They consist of a single byte for the event type,
4949
// followed by zero or more LEB128-encoded unsigned varints. They are decoded using
@@ -629,7 +629,7 @@ func traceAdvance(stopTrace bool) {
629629
// while they're still on that list. Removal from sched.freem is serialized with
630630
// this snapshot, so either we'll capture an m on sched.freem and race with
631631
// the removal to flush its buffers (resolved by traceThreadDestroy acquiring
632-
// the thread's seqlock, which one of us must win, so at least its old gen buffer
632+
// the thread's write flag, which one of us must win, so at least its old gen buffer
633633
// will be flushed in time for the new generation) or it will have flushed its
634634
// buffers before we snapshotted it to begin with.
635635
lock(&sched.lock)
@@ -645,7 +645,7 @@ func traceAdvance(stopTrace bool) {
645645

646646
// Iterate over our snapshot, flushing every buffer until we're done.
647647
//
648-
// Because trace writers read the generation while the seqlock is
648+
// Because trace writers read the generation while the write flag is
649649
// held, we can be certain that when there are no writers there are
650650
// also no stale generation values left. Therefore, it's safe to flush
651651
// any buffers that remain in that generation's slot.
@@ -658,7 +658,7 @@ func traceAdvance(stopTrace bool) {
658658
for mToFlush != nil {
659659
prev := &mToFlush
660660
for mp := *prev; mp != nil; {
661-
if mp.trace.seqlock.Load()%2 != 0 {
661+
if mp.trace.writing.Load() {
662662
// The M is writing. Come back to it later.
663663
prev = &mp.trace.link
664664
mp = mp.trace.link

src/runtime/tracecpu.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,20 +224,20 @@ func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) {
224224

225225
// We're going to conditionally write to one of two buffers based on the
226226
// generation. To make sure we write to the correct one, we need to make
227-
// sure this thread's trace seqlock is held. If it already is, then we're
227+
// sure this thread's trace write flag is set. If it already is, then we're
228228
// in the tracer and we can just take advantage of that. If it isn't, then
229229
// we need to acquire it and read the generation.
230230
locked := false
231-
if mp.trace.seqlock.Load()%2 == 0 {
232-
mp.trace.seqlock.Add(1)
231+
if !mp.trace.writing.Load() {
232+
mp.trace.writing.Store(true)
233233
locked = true
234234
}
235235
gen := trace.gen.Load()
236236
if gen == 0 {
237-
// Tracing is disabled, as it turns out. Release the seqlock if necessary
237+
// Tracing is disabled, as it turns out. Clear the write flag if necessary
238238
// and exit.
239239
if locked {
240-
mp.trace.seqlock.Add(1)
240+
mp.trace.writing.Store(false)
241241
}
242242
return
243243
}
@@ -275,8 +275,8 @@ func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) {
275275

276276
trace.signalLock.Store(0)
277277

278-
// Release the seqlock if we acquired it earlier.
278+
// Clear the write flag if we set it earlier.
279279
if locked {
280-
mp.trace.seqlock.Add(1)
280+
mp.trace.writing.Store(false)
281281
}
282282
}

src/runtime/traceruntime.go

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func (s *gTraceState) reset() {
2525

2626
// mTraceState is per-M state for the tracer.
2727
type mTraceState struct {
28-
seqlock atomic.Uintptr // seqlock indicating that this M is writing to a trace buffer.
28+
writing atomic.Bool // flag indicating that this M is writing to a trace buffer.
2929
buf [2][tracev2.NumExperiments]*traceBuf // Per-M traceBuf for writing. Indexed by trace.gen%2.
3030
link *m // Snapshot of alllink or freelink.
3131
reentered uint32 // Whether we've reentered tracing from within tracing.
@@ -211,21 +211,18 @@ func traceAcquireEnabled() traceLocker {
211211
// Check if we're already tracing. It's safe to be reentrant in general,
212212
// because this function (and the invariants of traceLocker.writer) ensure
213213
// that it is.
214-
if mp.trace.seqlock.Load()%2 == 1 {
214+
if mp.trace.writing.Load() {
215215
mp.trace.reentered++
216216
return traceLocker{mp, mp.trace.entryGen}
217217
}
218218

219-
// Acquire the trace seqlock. This prevents traceAdvance from moving forward
220-
// until all Ms are observed to be outside of their seqlock critical section.
219+
// Set the write flag. This prevents traceAdvance from moving forward
220+
// until all Ms are observed to be outside of a write critical section.
221221
//
222-
// Note: The seqlock is mutated here and also in traceCPUSample. If you update
223-
// usage of the seqlock here, make sure to also look at what traceCPUSample is
222+
// Note: The write flag is mutated here and also in traceCPUSample. If you update
223+
// usage of the write flag here, make sure to also look at what traceCPUSample is
224224
// doing.
225-
seq := mp.trace.seqlock.Add(1)
226-
if debugTraceReentrancy && seq%2 != 1 {
227-
throw("bad use of trace.seqlock")
228-
}
225+
mp.trace.writing.Store(true)
229226

230227
// N.B. This load of gen appears redundant with the one in traceEnabled.
231228
// However, it's very important that the gen we use for writing to the trace
@@ -237,7 +234,7 @@ func traceAcquireEnabled() traceLocker {
237234
// what we did and bail.
238235
gen := trace.gen.Load()
239236
if gen == 0 {
240-
mp.trace.seqlock.Add(1)
237+
mp.trace.writing.Store(false)
241238
releasem(mp)
242239
return traceLocker{}
243240
}
@@ -263,11 +260,7 @@ func traceRelease(tl traceLocker) {
263260
if tl.mp.trace.reentered > 0 {
264261
tl.mp.trace.reentered--
265262
} else {
266-
seq := tl.mp.trace.seqlock.Add(1)
267-
if debugTraceReentrancy && seq%2 != 0 {
268-
print("runtime: seq=", seq, "\n")
269-
throw("bad use of trace.seqlock")
270-
}
263+
tl.mp.trace.writing.Store(false)
271264
}
272265
releasem(tl.mp)
273266
}
@@ -699,10 +692,10 @@ func traceThreadDestroy(mp *m) {
699692
// Perform a traceAcquire/traceRelease on behalf of mp to
700693
// synchronize with the tracer trying to flush our buffer
701694
// as well.
702-
seq := mp.trace.seqlock.Add(1)
703-
if debugTraceReentrancy && seq%2 != 1 {
704-
throw("bad use of trace.seqlock")
695+
if debugTraceReentrancy && mp.trace.writing.Load() {
696+
throw("bad use of trace.writing")
705697
}
698+
mp.trace.writing.Store(true)
706699
systemstack(func() {
707700
lock(&trace.lock)
708701
for i := range mp.trace.buf {
@@ -717,9 +710,5 @@ func traceThreadDestroy(mp *m) {
717710
}
718711
unlock(&trace.lock)
719712
})
720-
seq1 := mp.trace.seqlock.Add(1)
721-
if seq1 != seq+1 {
722-
print("runtime: seq1=", seq1, "\n")
723-
throw("bad use of trace.seqlock")
724-
}
713+
mp.trace.writing.Store(false)
725714
}

0 commit comments

Comments
 (0)