runtime: replace trace seqlock with write flag

mknyszek · mknyszek · commit d68aec8db1bc · 2025-11-21T14:04:31.000-08:00
The runtime tracer currently uses a per-M seqlock to indicate whether a thread is writing to a local trace buffer. The seqlock is updated with two atomic adds, read-modify-write operations. These are quite expensive, even though they're completely uncontended. We can make these operations slightly cheaper by using an atomic store. The key insight here is that only one thread ever writes to the value at a time, so only the "write" of the read-modify-write actually matters. At that point, it doesn't really matter that we have a monotonically increasing counter. This is made clearer by the fact that nothing other than basic checks make sure the counter is monotonically increasing: everything only depends on whether the counter is even or odd. At that point, all we really need is a flag: an atomic.Bool, which we can update with an atomic Store, a write-only instruction. Change-Id: I0cfe39b34c7634554c34c53c0f0e196d125bbc4a Reviewed-on: https://go-review.googlesource.com/c/go/+/721840 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com>
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
@@ -13,17 +13,17 @@
 // ## Design
 //
 // The basic idea behind the the execution tracer is to have per-M buffers that
-// trace data may be written into. Each M maintains a seqlock indicating whether
+// trace data may be written into. Each M maintains a write flag indicating whether
 // its trace buffer is currently in use.
 //
 // Tracing is initiated by StartTrace, and proceeds in "generations," with each
 // generation being marked by a call to traceAdvance, to advance to the next
 // generation. Generations are a global synchronization point for trace data,
 // and we proceed to a new generation by moving forward trace.gen. Each M reads
-// trace.gen under its own seqlock to determine which generation it is writing
+// trace.gen under its own write flag to determine which generation it is writing
 // trace data for. To this end, each M has 2 slots for buffers: one slot for the
 // previous generation, one slot for the current one. It uses tl.gen to select
-// which buffer slot to write to. Simultaneously, traceAdvance uses the seqlock
+// which buffer slot to write to. Simultaneously, traceAdvance uses the write flag
 // to determine whether every thread is guaranteed to observe an updated
 // trace.gen. Once it is sure, it may then flush any buffers that are left over
 // from the previous generation safely, since it knows the Ms will not mutate
@@ -43,7 +43,7 @@
 // appear in pairs: one for the previous generation, and one for the current one.
 // Like the per-M buffers, which of the two is written to is selected using trace.gen,
 // and anything managed this way must similarly be mutated only in traceAdvance or
-// under the M's seqlock.
+// under the M's write flag.
 //
 // Trace events themselves are simple. They consist of a single byte for the event type,
 // followed by zero or more LEB128-encoded unsigned varints. They are decoded using
@@ -629,7 +629,7 @@ func traceAdvance(stopTrace bool) {
 	// while they're still on that list. Removal from sched.freem is serialized with
 	// this snapshot, so either we'll capture an m on sched.freem and race with
 	// the removal to flush its buffers (resolved by traceThreadDestroy acquiring
-	// the thread's seqlock, which one of us must win, so at least its old gen buffer
+	// the thread's write flag, which one of us must win, so at least its old gen buffer
 	// will be flushed in time for the new generation) or it will have flushed its
 	// buffers before we snapshotted it to begin with.
 	lock(&sched.lock)
@@ -645,7 +645,7 @@ func traceAdvance(stopTrace bool) {
 
 	// Iterate over our snapshot, flushing every buffer until we're done.
 	//
-	// Because trace writers read the generation while the seqlock is
+	// Because trace writers read the generation while the write flag is
 	// held, we can be certain that when there are no writers there are
 	// also no stale generation values left. Therefore, it's safe to flush
 	// any buffers that remain in that generation's slot.
@@ -658,7 +658,7 @@ func traceAdvance(stopTrace bool) {
 		for mToFlush != nil {
 			prev := &mToFlush
 			for mp := *prev; mp != nil; {
-				if mp.trace.seqlock.Load()%2 != 0 {
+				if mp.trace.writing.Load() {
 					// The M is writing. Come back to it later.
 					prev = &mp.trace.link
 					mp = mp.trace.link
diff --git a/src/runtime/tracecpu.go b/src/runtime/tracecpu.go
@@ -224,20 +224,20 @@ func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) {
 
 	// We're going to conditionally write to one of two buffers based on the
 	// generation. To make sure we write to the correct one, we need to make
-	// sure this thread's trace seqlock is held. If it already is, then we're
+	// sure this thread's trace write flag is set. If it already is, then we're
 	// in the tracer and we can just take advantage of that. If it isn't, then
 	// we need to acquire it and read the generation.
 	locked := false
-	if mp.trace.seqlock.Load()%2 == 0 {
-		mp.trace.seqlock.Add(1)
+	if !mp.trace.writing.Load() {
+		mp.trace.writing.Store(true)
 		locked = true
 	}
 	gen := trace.gen.Load()
 	if gen == 0 {
-		// Tracing is disabled, as it turns out. Release the seqlock if necessary
+		// Tracing is disabled, as it turns out. Clear the write flag if necessary
 		// and exit.
 		if locked {
-			mp.trace.seqlock.Add(1)
+			mp.trace.writing.Store(false)
 		}
 		return
 	}
@@ -275,8 +275,8 @@ func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) {
 
 	trace.signalLock.Store(0)
 
-	// Release the seqlock if we acquired it earlier.
+	// Clear the write flag if we set it earlier.
 	if locked {
-		mp.trace.seqlock.Add(1)
+		mp.trace.writing.Store(false)
 	}
 }
diff --git a/src/runtime/traceruntime.go b/src/runtime/traceruntime.go
@@ -25,7 +25,7 @@ func (s *gTraceState) reset() {
 
 // mTraceState is per-M state for the tracer.
 type mTraceState struct {
-	seqlock       atomic.Uintptr                       // seqlock indicating that this M is writing to a trace buffer.
+	writing       atomic.Bool                          // flag indicating that this M is writing to a trace buffer.
 	buf           [2][tracev2.NumExperiments]*traceBuf // Per-M traceBuf for writing. Indexed by trace.gen%2.
 	link          *m                                   // Snapshot of alllink or freelink.
 	reentered     uint32                               // Whether we've reentered tracing from within tracing.
@@ -211,21 +211,18 @@ func traceAcquireEnabled() traceLocker {
 	// Check if we're already tracing. It's safe to be reentrant in general,
 	// because this function (and the invariants of traceLocker.writer) ensure
 	// that it is.
-	if mp.trace.seqlock.Load()%2 == 1 {
+	if mp.trace.writing.Load() {
 		mp.trace.reentered++
 		return traceLocker{mp, mp.trace.entryGen}
 	}
 
-	// Acquire the trace seqlock. This prevents traceAdvance from moving forward
-	// until all Ms are observed to be outside of their seqlock critical section.
+	// Set the write flag. This prevents traceAdvance from moving forward
+	// until all Ms are observed to be outside of a write critical section.
 	//
-	// Note: The seqlock is mutated here and also in traceCPUSample. If you update
-	// usage of the seqlock here, make sure to also look at what traceCPUSample is
+	// Note: The write flag is mutated here and also in traceCPUSample. If you update
+	// usage of the write flag here, make sure to also look at what traceCPUSample is
 	// doing.
-	seq := mp.trace.seqlock.Add(1)
-	if debugTraceReentrancy && seq%2 != 1 {
-		throw("bad use of trace.seqlock")
-	}
+	mp.trace.writing.Store(true)
 
 	// N.B. This load of gen appears redundant with the one in traceEnabled.
 	// However, it's very important that the gen we use for writing to the trace
@@ -237,7 +234,7 @@ func traceAcquireEnabled() traceLocker {
 	// what we did and bail.
 	gen := trace.gen.Load()
 	if gen == 0 {
-		mp.trace.seqlock.Add(1)
+		mp.trace.writing.Store(false)
 		releasem(mp)
 		return traceLocker{}
 	}
@@ -263,11 +260,7 @@ func traceRelease(tl traceLocker) {
 	if tl.mp.trace.reentered > 0 {
 		tl.mp.trace.reentered--
 	} else {
-		seq := tl.mp.trace.seqlock.Add(1)
-		if debugTraceReentrancy && seq%2 != 0 {
-			print("runtime: seq=", seq, "\n")
-			throw("bad use of trace.seqlock")
-		}
+		tl.mp.trace.writing.Store(false)
 	}
 	releasem(tl.mp)
 }
@@ -699,10 +692,10 @@ func traceThreadDestroy(mp *m) {
 	// Perform a traceAcquire/traceRelease on behalf of mp to
 	// synchronize with the tracer trying to flush our buffer
 	// as well.
-	seq := mp.trace.seqlock.Add(1)
-	if debugTraceReentrancy && seq%2 != 1 {
-		throw("bad use of trace.seqlock")
+	if debugTraceReentrancy && mp.trace.writing.Load() {
+		throw("bad use of trace.writing")
 	}
+	mp.trace.writing.Store(true)
 	systemstack(func() {
 		lock(&trace.lock)
 		for i := range mp.trace.buf {
@@ -717,9 +710,5 @@ func traceThreadDestroy(mp *m) {
 		}
 		unlock(&trace.lock)
 	})
-	seq1 := mp.trace.seqlock.Add(1)
-	if seq1 != seq+1 {
-		print("runtime: seq1=", seq1, "\n")
-		throw("bad use of trace.seqlock")
-	}
+	mp.trace.writing.Store(false)
 }