From cf9423432da5b4abffb79dac14bbf099428fba06 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Tue, 15 Jul 2025 12:15:51 +0200 Subject: [PATCH 1/8] Proposal #74609: goroutine leak detection by using the garbage collector. --- .../goexperiment/exp_deadlockgc_off.go | 8 + .../goexperiment/exp_deadlockgc_on.go | 8 + src/internal/goexperiment/flags.go | 3 + src/runtime/export_test.go | 2 +- src/runtime/mgc.go | 308 +++++++++++++++++- src/runtime/mgcmark.go | 153 ++++++++- src/runtime/preempt.go | 3 +- src/runtime/proc.go | 14 +- src/runtime/runtime2.go | 19 ++ src/runtime/sema.go | 50 ++- src/runtime/traceback.go | 17 +- src/runtime/tracestatus.go | 2 +- 12 files changed, 542 insertions(+), 45 deletions(-) create mode 100644 src/internal/goexperiment/exp_deadlockgc_off.go create mode 100644 src/internal/goexperiment/exp_deadlockgc_on.go diff --git a/src/internal/goexperiment/exp_deadlockgc_off.go b/src/internal/goexperiment/exp_deadlockgc_off.go new file mode 100644 index 00000000000000..185171f4414c6c --- /dev/null +++ b/src/internal/goexperiment/exp_deadlockgc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.deadlockgc + +package goexperiment + +const DeadlockGC = false +const DeadlockGCInt = 0 diff --git a/src/internal/goexperiment/exp_deadlockgc_on.go b/src/internal/goexperiment/exp_deadlockgc_on.go new file mode 100644 index 00000000000000..5c2b07ca55653b --- /dev/null +++ b/src/internal/goexperiment/exp_deadlockgc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.deadlockgc + +package goexperiment + +const DeadlockGC = true +const DeadlockGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index 63a338883991e0..969f975dd4c9e3 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -129,4 +129,7 @@ type Flags struct { // GreenTeaGC enables the Green Tea GC implementation. GreenTeaGC bool + + // DeadlockGC enables the Deadlock GC implementation. + DeadlockGC bool } diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 9a4611e26e52a2..14028db30efeac 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1208,7 +1208,7 @@ func (t *SemTable) Enqueue(addr *uint32) { s.releasetime = 0 s.acquiretime = 0 s.ticket = 0 - t.semTable.rootFor(addr).queue(addr, s, false) + t.semTable.rootFor(addr).queue(addr, s, false, false) } // Dequeue simulates dequeuing a waiter for a semaphore (or lock) at addr. diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f2df1a00e0c683..b7105aa965f62a 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -373,17 +373,32 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // nStackRoots == len(stackRoots), but we have nStackRoots for - // consistency. - nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int + // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots) + // during deadlock detection GC, nLiveStackRoots is the number of stackRoots + // to examine, and nStackRoots == len(stackRoots), which include goroutines that are + // unmarked / not runnable + nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nLiveStackRoots int + + // The GC has performed deadlock detection during this GC cycle. + detectedDeadlocks bool + + // Is set to true by DetectDeadlocks(), instructing the next GC cycle to perform deadlock detection. + pendingDeadlockDetection bool + + // When set, the GC is running in deadlock detection mode. + // This can be triggered with a runtime flag. + deadlockDetectionMode bool // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 - // stackRoots is a snapshot of all of the Gs that existed - // before the beginning of concurrent marking. The backing - // store of this must not be modified because it might be - // shared with allgs. + // stackRoots is a snapshot of all of the Gs that existed before the + // beginning of concurrent marking. During deadlock detection GC, stackRoots + // is partitioned into two sets; to the left of nLiveStackRoots are stackRoots + // of running / runnable goroutines and to the right of nLiveStackRoots are + // stackRoots of unmarked / not runnable goroutines + // gcDiscoverMoreStackRoots modifies the stackRoots array to redo the partition + // after each marking phase stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -550,6 +565,29 @@ func GC() { releasem(mp) } +// DetectDeadlocks instructs the Go garbage collector to attempt +// partial deadlock detection. +// +// Only operates if deadlockgc is enabled in GOEXPERIMENT. +// Otherwise, it just runs runtime.GC(). +func DetectDeadlocks() { + if !goexperiment.DeadlockGC { + GC() + return + } + + // This write should be thread-safe, as the overwritten value is true. + // pendingDeadlockDetection is only set to false under STW at the start + // of the GC cycle that picks it up. + work.pendingDeadlockDetection = true + + // This read should be thread-safe for the same reason as the write above above. + // At most, we trigger the GC an additional time. + for work.pendingDeadlockDetection { + GC() + } +} + // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has // already completed this mark phase, it returns immediately. func gcWaitOnMark(n uint32) { @@ -695,6 +733,11 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode + } else if goexperiment.DeadlockGC { + if work.pendingDeadlockDetection { + // Fully stop the world if running deadlock detection. + mode = gcForceBlockMode + } } // Ok, we're doing it! Stop everybody else @@ -757,6 +800,7 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) + work.detectedDeadlocks = false // Assists and workers can start the moment we start // the world. @@ -788,6 +832,14 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) + if goexperiment.DeadlockGC { + if work.pendingDeadlockDetection { + // Write is thread-safe because the world is stopped + work.deadlockDetectionMode = true + work.pendingDeadlockDetection = false + } + } + gcBgMarkPrepare() // Must happen before assists are enabled. gcPrepareMarkRoots() @@ -888,6 +940,11 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) + if goexperiment.DeadlockGC { + if work.deadlockDetectionMode { + gcDiscoverMoreStackRoots() + } + } top: // Re-check transition condition under transition lock. @@ -947,8 +1004,7 @@ top: // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be // shaded. Transition to mark termination. - now := nanotime() - work.tMarkTerm = now + var now int64 getg().m.preemptoff = "gcing" var stw worldStop systemstack(func() { @@ -994,6 +1050,54 @@ top: }) semrelease(&worldsema) goto top + } else if goexperiment.DeadlockGC { + // Otherwise, do a deadlock detection round. + // Only do one deadlock detection round per GC cycle. + if work.deadlockDetectionMode && !work.detectedDeadlocks { + work.detectedDeadlocks = detectDeadlocks() + + getg().m.preemptoff = "" + systemstack(func() { + // Accumulate the time we were stopped before we had to start again. + work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + + now := startTheWorldWithSema(0, stw) + work.pauseNS += now - stw.startedStopping + }) + semrelease(&worldsema) + goto top + } + + now = nanotime() + work.tMarkTerm = now + // Check again whether any P needs to flush its write barrier + // to the GC work queue. + systemstack(func() { + for _, p := range allp { + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break + } + } + }) + + // If that is the case, restart again. Once restarts are no longer needed, + // run this without deadlock detection. + if restart { + gcDebugMarkDone.restartedDueTo27993 = true + + getg().m.preemptoff = "" + systemstack(func() { + // Accumulate the time we were stopped before we had to start again. + work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) + + now := startTheWorldWithSema(0, stw) + work.pauseNS += now - stw.startedStopping + }) + semrelease(&worldsema) + goto top + } } gcComputeStartingStackSize() @@ -1032,6 +1136,171 @@ top: gcMarkTermination(stw) } +// Check if an object is marked in the heap. +func checkIfMarked(p unsafe.Pointer) bool { + obj, span, objIndex := findObject(uintptr(p), 0, 0) + if obj != 0 { + mbits := span.markBitsForIndex(objIndex) + return mbits.isMarked() + } + // if we fall through to get here, we are within the stack ranges of reachable goroutines + return true +} + +// maybeLive checks whether a goroutine may still be semantically runnable. +// This returns true if the goroutine is waiting on at least one concurrency primitive +// which is reachable in memory, i.e., has been by the GC. +// +// For goroutines which are semantically runnable, this will eventually return true +// as the GC marking phase progresses. +func (gp *g) maybeLive() bool { + // Unmask the goroutine address to ensure we are not + // dereferencing a masked address. + gp = gp.unmask() + + switch gp.waitreason { + case waitReasonSelectNoCases, + waitReasonChanSendNilChan, + waitReasonChanReceiveNilChan: + // Select with no cases or communicating on nil channels + // make goroutines unrunnable by definition. + return false + case waitReasonChanReceive, + waitReasonSelect, + waitReasonChanSend: + // Cycle all through all *sudog to check whether + // the goroutine is waiting on a marked channel. + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + if checkIfMarked(unsafe.Pointer(sg.c)) { + return true + } + } + return false + case waitReasonSyncCondWait, + waitReasonSyncWaitGroupWait, + waitReasonSyncMutexLock, + waitReasonSyncRWMutexLock, + waitReasonSyncRWMutexRLock: + // If waiting on mutexes, wait groups, or condition variables, + // check if the synchronization primitive attached to the sudog is marked. + if gp.waiting != nil { + // Unmask the sema address and check if it's marked. + return checkIfMarked(gcUnmask(gp.waiting.elem)) + } + } + return true +} + +// unmask returns a *g object with an unmasked address. +// +//go:nosplit +func (gp *g) unmask() *g { + return (*g)(gcUnmask(unsafe.Pointer(gp))) +} + +// mask returns a *g object with a masked address. +// +//go:nosplit +func (gp *g) mask() *g { + return (*g)(gcMask(unsafe.Pointer(gp))) +} + +// Check to see if more blocked but marked goroutines exist; +// if so add them into root set and increment work.markrootJobs accordingly +// return true if we need to run another phase of markroots; return false otherwise +func gcDiscoverMoreStackRoots() { + // to begin with we have a set of unchecked stackRoots between + // vIndex and ivIndex. During the loop, anything < vIndex should be + // valid stackRoots and anything >= ivIndex should be invalid stackRoots + // and the loop terminates when the two indices meet + var vIndex, ivIndex int = work.nLiveStackRoots, work.nStackRoots + + // Reorder goroutine list + for vIndex < ivIndex { + gp := work.stackRoots[vIndex] + if gp.maybeLive() { + work.stackRoots[vIndex] = gp + vIndex = vIndex + 1 + continue + } + for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { + if swapGp := work.stackRoots[ivIndex]; swapGp.maybeLive() { + work.stackRoots[ivIndex] = gp + work.stackRoots[vIndex] = swapGp.unmask() + vIndex = vIndex + 1 + break + } + } + } + + var oldRootJobs int32 = int32(atomic.Load(&work.markrootJobs)) + var newRootJobs int32 = int32(work.baseStacks) + int32(vIndex) + + if newRootJobs > oldRootJobs { + // reset markrootNext as it could have been incremented past markrootJobs + work.nLiveStackRoots = vIndex + atomic.Store(&work.markrootJobs, uint32(newRootJobs)) + } +} + +// detectDeadlocks scans the remaining stackRoots and marks any which are +// blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). +// Returns true if goroutine leak was performed (or unnecessary). +// Returns false if the GC cycle has not yet reached a fix point for reachable goroutines. +func detectDeadlocks() bool { + // Report deadlocks and mark them unreachable, and resume marking + // we still need to mark these unreachable *g structs as they + // get reused, but their stack won't get scanned + if work.nLiveStackRoots == work.nStackRoots { + // nStackRoots == nLiveStackRoots means that all goroutines are marked. + return true + } + + // Try to reach another fix point here. Keep scouting for runnable goroutines until + // none are left. + // Valid goroutines may be found after all GC work is drained. + // Make sure these are pushed to the runnable set and ready to be marked. + var foundMoreWork bool + for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { + gp := work.stackRoots[i].unmask() + if readgstatus(gp) == _Gwaiting && !gp.maybeLive() { + // Blocking unrunnable goroutines will be skipped. + continue + } + work.stackRoots[i] = work.stackRoots[work.nLiveStackRoots] + work.stackRoots[work.nLiveStackRoots] = gp + work.nLiveStackRoots += 1 + // We now have one more markroot job. + work.markrootJobs += 1 + // We might still have some work to do. + // Make sure in the next iteration we will check re-check for new runnable goroutines. + foundMoreWork = true + } + if foundMoreWork { + // We found more work, so we need to resume the marking phase. + return false + } + + // For the remaining goroutines, mark them as unreachable and deadlocking. + for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { + gp := work.stackRoots[i].unmask() + casgstatus(gp, _Gwaiting, _Gdeadlocked) + fn := findfunc(gp.startpc) + if fn.valid() { + print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") + } else { + print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") + } + traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp) + println() + work.stackRoots[i] = gp + } + // Put the remaining roots as ready for marking and drain them. + work.markrootJobs += uint32(work.nStackRoots - work.nLiveStackRoots) + work.nLiveStackRoots = work.nStackRoots + return true +} + // World must be stopped and mark assists and background workers must be // disabled. func gcMarkTermination(stw worldStop) { @@ -1185,6 +1454,13 @@ func gcMarkTermination(stw worldStop) { } systemstack(func() { + if goexperiment.DeadlockGC { + // Pull the GC out of deadlock detection mode. + // Write is thread-safe because the world is stopped, and only one + // GC cycle can run at a time. + work.deadlockDetectionMode = false + } + // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as // sched.idleTime and sched.totaltime. memstats also include @@ -1612,10 +1888,12 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if work.markrootNext < work.markrootJobs { - return true // root scan work available + if !work.deadlockDetectionMode { + return work.markrootNext < work.markrootJobs } - return false + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + return rootNext < rootJobs } // gcMark runs the mark (or, for concurrent GC, mark termination) @@ -1628,8 +1906,10 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() { - print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n") + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + if work.full != 0 || rootNext < rootJobs { + print("runtime: full=", hex(work.full), " next=", rootNext, " jobs=", rootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index a136c7aeaceda2..1e13ecc396d293 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -51,8 +51,86 @@ const ( // Must be a multiple of the pageInUse bitmap element size and // must also evenly divide pagesPerArena. pagesPerSpanRoot = 512 + + gcUndoBitMask = uintptr(uintptrMask >> 2) // This constant reserves some bits of the address space for the GC to use in order to mask addresses. + gcBitMask = ^gcUndoBitMask // This flips every bit in gcUndoBitMask of uinptr width ) +// gcMask masks addresses that should not be automatically marked during the GC. +// +//go:nosplit +func gcMask(p unsafe.Pointer) unsafe.Pointer { + if goexperiment.DeadlockGC { + return unsafe.Pointer(uintptr(p) | gcBitMask) + } + return p +} + +// gcUnmask undoes the bit-mask applied to a pointer. +// +//go:nosplit +func gcUnmask(p unsafe.Pointer) unsafe.Pointer { + if goexperiment.DeadlockGC { + return unsafe.Pointer(uintptr(p) & gcUndoBitMask) + } + return p +} + +// internalBlocked returns true if the goroutine is blocked due to a +// non-deadlocking waitReason, e.g. waiting for the netpoller or garbage collector. +// Such goroutines should never be considered for deadlock detection. +// +//go:nosplit +func (gp *g) internalBlocked() bool { + reason := gp.waitreason + return reason != waitReasonChanReceive && + reason != waitReasonSyncWaitGroupWait && + reason != waitReasonChanSend && + reason != waitReasonChanReceiveNilChan && + reason != waitReasonChanSendNilChan && + reason != waitReasonSelect && + reason != waitReasonSelectNoCases && + reason != waitReasonSyncMutexLock && + reason != waitReasonSyncRWMutexRLock && + reason != waitReasonSyncRWMutexLock && + reason != waitReasonSyncCondWait +} + +// The world must be stopped or allglock must be held. +// go through the snapshot of allgs, putting them into an arrays, +// separated by index, where [0:blockedIndex] contains only running Gs +// allGs[blockedIndex:] contain only blocking Gs +// To avoid GC from marking and scanning the blocked Gs by scanning +// the returned array (which is heap allocated), we mask the highest +// bit of the pointers to Gs with gcBitMask. +func allGsSnapshotSortedForGC() ([]*g, int) { + assertWorldStoppedOrLockHeld(&allglock) + + allgsSorted := make([]*g, len(allgs)) + + // Indices cutting off runnable and blocked Gs. + var currIndex, blockedIndex = 0, len(allgsSorted) - 1 + for _, gp := range allgs { + gp = gp.unmask() + // not sure if we need atomic load because we are stopping the world, + // but do it just to be safe for now + if status := readgstatus(gp); status != _Gwaiting || gp.internalBlocked() { + allgsSorted[currIndex] = gp + currIndex++ + } else { + allgsSorted[blockedIndex] = gp.mask() + blockedIndex-- + } + } + + // Because the world is stopped or allglock is held, allgadd + // cannot happen concurrently with this. allgs grows + // monotonically and existing entries never change, so we can + // simply return a copy of the slice header. For added safety, + // we trim everything past len because that can still change. + return allgsSorted, blockedIndex + 1 +} + // gcPrepareMarkRoots queues root scanning jobs (stacks, globals, and // some miscellany) and initializes scanning-related state. // @@ -102,11 +180,23 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - work.stackRoots = allGsSnapshot() + if goexperiment.DeadlockGC { + if work.deadlockDetectionMode { + work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() + } else { + // regular GC --- scan every go routine + work.stackRoots = allGsSnapshot() + work.nLiveStackRoots = len(work.stackRoots) + } + } else { + // regular GC --- scan every go routine + work.stackRoots = allGsSnapshot() + work.nLiveStackRoots = len(work.stackRoots) + } work.nStackRoots = len(work.stackRoots) work.markrootNext = 0 - work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) + work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nLiveStackRoots) // Calculate base indexes of each root type work.baseData = uint32(fixedRootCount) @@ -119,8 +209,10 @@ func gcPrepareMarkRoots() { // gcMarkRootCheck checks that all roots have been scanned. It is // purely for debugging. func gcMarkRootCheck() { - if work.markrootNext < work.markrootJobs { - print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + if rootNext < rootJobs { + print(rootNext, " of ", rootJobs, " markroot jobs done\n") throw("left over markroot jobs") } @@ -868,7 +960,7 @@ func scanstack(gp *g, gcw *gcWork) int64 { case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") throw("scanstack: goroutine not stopped") - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: // ok } @@ -1136,6 +1228,32 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) } +func gcUpdateMarkrootNext() (uint32, bool) { + var success bool + var next uint32 = atomic.Load(&work.markrootNext) + var jobs uint32 = atomic.Load(&work.markrootJobs) + + if next < jobs { + // still work available at the moment + for !success { + success = atomic.Cas(&work.markrootNext, next, next+1) + // We manage to snatch a root job. Return the root index. + if success { + return next, true + } + + // Get the latest value of markrootNext. + next = atomic.Load(&work.markrootNext) + jobs := atomic.Load(&work.markrootJobs) + // We are out of markroot jobs. + if next >= jobs { + break + } + } + } + return 0, false +} + // gcDrain scans roots and objects in work buffers, blackening grey // objects until it is unable to get more work. It may return before // GC is done; it's the caller's responsibility to balance work from @@ -1194,13 +1312,16 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { } } - // Drain root marking jobs. - if work.markrootNext < work.markrootJobs { + rootNext := atomic.Load(&work.markrootNext) + rootJobs := atomic.Load(&work.markrootJobs) + if rootNext < rootJobs { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. + // + // Continue unconditionally if we're draining partial deadlocks. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job >= work.markrootJobs { + job, success := gcUpdateMarkrootNext() + if !success { break } markroot(gcw, job, flushBgCredit) @@ -1346,9 +1467,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { wbBufFlush() if b = gcw.tryGetObj(); b == 0 { // Try to do a root job. - if work.markrootNext < work.markrootJobs { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job < work.markrootJobs { + if atomic.Load(&work.markrootNext) < atomic.Load(&work.markrootJobs) { + job, success := gcUpdateMarkrootNext() + if success { workFlushed += markroot(gcw, job, false) continue } @@ -1513,6 +1634,14 @@ func scanobject(b uintptr, gcw *gcWork) { // At this point we have extracted the next potential pointer. // Quickly filter out nil and pointers back to the current object. if obj != 0 && obj-b >= n { + if goexperiment.DeadlockGC { + // The GC will skip masked addresses if DeadlockGC is enabled. + if (uintptr(unsafe.Pointer(obj)) & gcBitMask) == gcBitMask { + // Skip masked pointers. + continue + } + } + // Test if obj points into the Go heap and, if so, // mark the object. // diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index c41c3558359c0c..f2e98898536676 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -160,7 +160,7 @@ func suspendG(gp *g) suspendGState { s = _Gwaiting fallthrough - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: // Claim goroutine by setting scan bit. // This may race with execution or readying of gp. // The scan bit keeps it from transition state. @@ -269,6 +269,7 @@ func resumeG(state suspendGState) { case _Grunnable | _Gscan, _Gwaiting | _Gscan, + _Gdeadlocked | _Gscan, _Gsyscall | _Gscan: casfrom_Gscanstatus(gp, s, s&^_Gscan) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index b41bbe93cf57c7..08de431e1423ac 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -8,6 +8,7 @@ import ( "internal/abi" "internal/cpu" "internal/goarch" + "internal/goexperiment" "internal/goos" "internal/runtime/atomic" "internal/runtime/exithook" @@ -689,7 +690,7 @@ func allgadd(gp *g) { } lock(&allglock) - allgs = append(allgs, gp) + allgs = append(allgs, gp.mask()) if &allgs[0] != allgptr { atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) } @@ -708,6 +709,11 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. + if goexperiment.DeadlockGC { + for i, gp := range allgs { + allgs[i] = gp.unmask() + } + } return allgs[:len(allgs):len(allgs)] } @@ -729,7 +735,7 @@ func atomicAllGIndex(ptr **g, i uintptr) *g { func forEachG(fn func(gp *g)) { lock(&allglock) for _, gp := range allgs { - fn(gp) + fn(gp.unmask()) } unlock(&allglock) } @@ -742,7 +748,7 @@ func forEachGRace(fn func(gp *g)) { ptr, length := atomicAllG() for i := uintptr(0); i < length; i++ { gp := atomicAllGIndex(ptr, i) - fn(gp) + fn(gp.unmask()) } return } @@ -1208,6 +1214,7 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { _Gscanwaiting, _Gscanrunning, _Gscansyscall, + _Gscandeadlocked, _Gscanpreempted: if newval == oldval&^_Gscan { success = gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -1228,6 +1235,7 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { case _Grunnable, _Grunning, _Gwaiting, + _Gdeadlocked, _Gsyscall: if newval == oldval|_Gscan { r := gp.atomicstatus.CompareAndSwap(oldval, newval) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 527611f96a29d9..7e5d27603eef8a 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,6 +87,9 @@ const ( // ready()ing this G. _Gpreempted // 9 + // _Gdeadlocked represents a deadlocked goroutine caught by the GC. + _Gdeadlocked // 10 + // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The // goroutine is not executing user code and the stack is owned @@ -104,6 +107,8 @@ const ( _Gscansyscall = _Gscan + _Gsyscall // 0x1003 _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 + + _Gscandeadlocked = _Gscan + _Gdeadlocked // 0x100a ) const ( @@ -1159,12 +1164,26 @@ func (w waitReason) String() string { return waitReasonStrings[w] } +// isMutexWait returns true if the goroutine is blocked because of +// sync.Mutex.Lock or sync.RWMutex.[R]Lock. +// +//go:nosplit func (w waitReason) isMutexWait() bool { return w == waitReasonSyncMutexLock || w == waitReasonSyncRWMutexRLock || w == waitReasonSyncRWMutexLock } +// isSyncWait returns true if the goroutine is blocked because of +// sync library primitive operations. +// +//go:nosplit +func (w waitReason) isSyncWait() bool { + return w == waitReasonSyncWaitGroupWait || + w == waitReasonSyncCondWait || + w.isMutexWait() +} + func (w waitReason) isWaitingForSuspendG() bool { return isWaitingForSuspendG[w] } diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 6af49b1b0c42d9..8927dfe262160e 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -21,6 +21,7 @@ package runtime import ( "internal/cpu" + "internal/goexperiment" "internal/runtime/atomic" "unsafe" ) @@ -188,7 +189,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes i } // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. - root.queue(addr, s, lifo) + root.queue(addr, s, lifo, reason.isSyncWait()) goparkunlock(&root.lock, reason, traceBlockSync, 4+skipframes) if s.ticket != 0 || cansemacquire(addr) { break @@ -301,9 +302,18 @@ func cansemacquire(addr *uint32) bool { } // queue adds s to the blocked goroutines in semaRoot. -func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { +func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() - s.elem = unsafe.Pointer(addr) + pAddr := unsafe.Pointer(addr) + if goexperiment.DeadlockGC { + if syncSema { + // Mask the addr so it doesn't get marked during GC + // through marking of the treap or marking of the blocked goroutine + pAddr = gcMask(unsafe.Pointer(addr)) + s.g.waiting = s + } + } + s.elem = pAddr s.next = nil s.prev = nil s.waiters = 0 @@ -311,7 +321,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - if t.elem == unsafe.Pointer(addr) { + if t.elem == pAddr { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -357,7 +367,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { return } last = t - if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) { + if uintptr(pAddr) < uintptr(t.elem) { pt = &t.prev } else { pt = &t.next @@ -402,6 +412,25 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) { ps := &root.treap s := *ps + + if goexperiment.DeadlockGC { + // First try to find a masked address. + var pAddr unsafe.Pointer = gcMask(unsafe.Pointer(addr)) + for ; s != nil; s = *ps { + if s.elem == pAddr { + goto Found + } + if uintptr(pAddr) < uintptr(s.elem) { + ps = &s.prev + } else { + ps = &s.next + } + } + // Otherwise, try to find an unmasked address. + ps = &root.treap + s = *ps + } + for ; s != nil; s = *ps { if s.elem == unsafe.Pointer(addr) { goto Found @@ -470,6 +499,9 @@ Found: } tailtime = s.acquiretime } + if goexperiment.DeadlockGC { + s.g.waiting = nil + } s.parent = nil s.elem = nil s.next = nil @@ -590,6 +622,10 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() + if goexperiment.DeadlockGC { + s.elem = gcMask(unsafe.Pointer(l)) + s.g.waiting = s + } s.ticket = t s.releasetime = 0 t0 := int64(0) @@ -607,6 +643,10 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } + if goexperiment.DeadlockGC { + s.g.waiting = nil + s.elem = nil + } releaseSudog(s) } diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00c0f08e5593c8..44f28971f89de0 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1199,14 +1199,15 @@ func elideWrapperCalling(id abi.FuncID) bool { } var gStatusStrings = [...]string{ - _Gidle: "idle", - _Grunnable: "runnable", - _Grunning: "running", - _Gsyscall: "syscall", - _Gwaiting: "waiting", - _Gdead: "dead", - _Gcopystack: "copystack", - _Gpreempted: "preempted", + _Gidle: "idle", + _Grunnable: "runnable", + _Grunning: "running", + _Gsyscall: "syscall", + _Gwaiting: "waiting", + _Gdead: "dead", + _Gcopystack: "copystack", + _Gdeadlocked: "deadlocked", + _Gpreempted: "preempted", } func goroutineheader(gp *g) { diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index 03ec81fc0262a1..e04f012001dd60 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -122,7 +122,7 @@ func goStatusToTraceGoStatus(status uint32, wr waitReason) tracev2.GoStatus { tgs = tracev2.GoRunning case _Gsyscall: tgs = tracev2.GoSyscall - case _Gwaiting, _Gpreempted: + case _Gwaiting, _Gpreempted, _Gdeadlocked: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the From 352dc0d30e58e8e09a25e357c2531ffe59b6aef6 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Wed, 16 Jul 2025 14:12:21 +0200 Subject: [PATCH 2/8] Corrected sema dequeue implementation. --- src/runtime/sema.go | 49 +++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 8927dfe262160e..d7288580c96611 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -321,7 +321,13 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - if t.elem == pAddr { + var cmp bool + if goexperiment.DeadlockGC { + cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) + } else { + cmp = uintptr(pAddr) == uintptr(t.elem) + } + if cmp { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -367,7 +373,12 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if uintptr(pAddr) < uintptr(t.elem) { + if goexperiment.DeadlockGC { + cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) + } else { + cmp = uintptr(pAddr) < uintptr(t.elem) + } + if cmp { pt = &t.prev } else { pt = &t.next @@ -413,29 +424,23 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) ps := &root.treap s := *ps - if goexperiment.DeadlockGC { - // First try to find a masked address. - var pAddr unsafe.Pointer = gcMask(unsafe.Pointer(addr)) - for ; s != nil; s = *ps { - if s.elem == pAddr { - goto Found - } - if uintptr(pAddr) < uintptr(s.elem) { - ps = &s.prev - } else { - ps = &s.next - } - } - // Otherwise, try to find an unmasked address. - ps = &root.treap - s = *ps - } - for ; s != nil; s = *ps { - if s.elem == unsafe.Pointer(addr) { + var cmp bool + if goexperiment.DeadlockGC { + cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) + } else { + cmp = unsafe.Pointer(addr) == s.elem + } + if cmp { goto Found } - if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) { + + if goexperiment.DeadlockGC { + cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) + } else { + cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) + } + if cmp { ps = &s.prev } else { ps = &s.next From f903068cda7d7cd846a1f1fa81d22966a6c4c97a Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Wed, 16 Jul 2025 14:47:02 +0200 Subject: [PATCH 3/8] Ordered wait reasons for easier checks. --- src/runtime/mgcmark.go | 12 +----------- src/runtime/runtime2.go | 16 +++++++--------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 1e13ecc396d293..65a5485c274f0f 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -83,17 +83,7 @@ func gcUnmask(p unsafe.Pointer) unsafe.Pointer { //go:nosplit func (gp *g) internalBlocked() bool { reason := gp.waitreason - return reason != waitReasonChanReceive && - reason != waitReasonSyncWaitGroupWait && - reason != waitReasonChanSend && - reason != waitReasonChanReceiveNilChan && - reason != waitReasonChanSendNilChan && - reason != waitReasonSelect && - reason != waitReasonSelectNoCases && - reason != waitReasonSyncMutexLock && - reason != waitReasonSyncRWMutexRLock && - reason != waitReasonSyncRWMutexLock && - reason != waitReasonSyncCondWait + return reason < waitReasonChanReceiveNilChan || waitReasonSyncWaitGroupWait < reason } // The world must be stopped or allglock must be held. diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 7e5d27603eef8a..b0237a29657d3e 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1061,24 +1061,24 @@ const ( waitReasonZero waitReason = iota // "" waitReasonGCAssistMarking // "GC assist marking" waitReasonIOWait // "IO wait" - waitReasonChanReceiveNilChan // "chan receive (nil chan)" - waitReasonChanSendNilChan // "chan send (nil chan)" waitReasonDumpingHeap // "dumping heap" waitReasonGarbageCollection // "garbage collection" waitReasonGarbageCollectionScan // "garbage collection scan" waitReasonPanicWait // "panicwait" - waitReasonSelect // "select" - waitReasonSelectNoCases // "select (no cases)" waitReasonGCAssistWait // "GC assist wait" waitReasonGCSweepWait // "GC sweep wait" waitReasonGCScavengeWait // "GC scavenge wait" - waitReasonChanReceive // "chan receive" - waitReasonChanSend // "chan send" waitReasonFinalizerWait // "finalizer wait" waitReasonForceGCIdle // "force gc (idle)" waitReasonUpdateGOMAXPROCSIdle // "GOMAXPROCS updater (idle)" waitReasonSemacquire // "semacquire" waitReasonSleep // "sleep" + waitReasonChanReceiveNilChan // "chan receive (nil chan)" + waitReasonChanSendNilChan // "chan send (nil chan)" + waitReasonSelect // "select" + waitReasonSelectNoCases // "select (no cases)" + waitReasonChanReceive // "chan receive" + waitReasonChanSend // "chan send" waitReasonSyncCondWait // "sync.Cond.Wait" waitReasonSyncMutexLock // "sync.Mutex.Lock" waitReasonSyncRWMutexRLock // "sync.RWMutex.RLock" @@ -1179,9 +1179,7 @@ func (w waitReason) isMutexWait() bool { // //go:nosplit func (w waitReason) isSyncWait() bool { - return w == waitReasonSyncWaitGroupWait || - w == waitReasonSyncCondWait || - w.isMutexWait() + return waitReasonSyncCondWait <= w && w <= waitReasonSyncWaitGroupWait } func (w waitReason) isWaitingForSuspendG() bool { From f8a6d93a33d6b233f88f49fe92d1ed60271ef70a Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 17 Jul 2025 13:16:35 +0200 Subject: [PATCH 4/8] Optimized bitmask check in scanobject. --- src/runtime/mgcmark.go | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 65a5485c274f0f..37c44b0c04a7c6 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -1623,15 +1623,9 @@ func scanobject(b uintptr, gcw *gcWork) { // At this point we have extracted the next potential pointer. // Quickly filter out nil and pointers back to the current object. - if obj != 0 && obj-b >= n { - if goexperiment.DeadlockGC { - // The GC will skip masked addresses if DeadlockGC is enabled. - if (uintptr(unsafe.Pointer(obj)) & gcBitMask) == gcBitMask { - // Skip masked pointers. - continue - } - } - + // The GC will skip masked addresses if DeadlockGC is enabled. + if obj != 0 && obj-b >= n && + (!goexperiment.DeadlockGC || obj <= gcUndoBitMask) { // Test if obj points into the Go heap and, if so, // mark the object. // From 67a74ab2f5bdc2f122d9b0a52de29576927f539e Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Thu, 17 Jul 2025 16:31:19 +0200 Subject: [PATCH 5/8] Renamed deadlockgc to golfgc and deadlocks to goroutine leaks to avoid confusion with global deadlock. --- .../goexperiment/exp_deadlockgc_off.go | 8 -- .../goexperiment/exp_deadlockgc_on.go | 8 -- src/internal/goexperiment/exp_golfgc_off.go | 8 ++ src/internal/goexperiment/exp_golfgc_on.go | 8 ++ src/internal/goexperiment/flags.go | 4 +- src/runtime/mgc.go | 97 ++++++++++--------- src/runtime/mgcmark.go | 22 ++--- src/runtime/preempt.go | 4 +- src/runtime/proc.go | 2 +- src/runtime/runtime2.go | 6 +- src/runtime/sema.go | 17 ++-- src/runtime/traceback.go | 18 ++-- src/runtime/tracestatus.go | 2 +- 13 files changed, 104 insertions(+), 100 deletions(-) delete mode 100644 src/internal/goexperiment/exp_deadlockgc_off.go delete mode 100644 src/internal/goexperiment/exp_deadlockgc_on.go create mode 100644 src/internal/goexperiment/exp_golfgc_off.go create mode 100644 src/internal/goexperiment/exp_golfgc_on.go diff --git a/src/internal/goexperiment/exp_deadlockgc_off.go b/src/internal/goexperiment/exp_deadlockgc_off.go deleted file mode 100644 index 185171f4414c6c..00000000000000 --- a/src/internal/goexperiment/exp_deadlockgc_off.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build !goexperiment.deadlockgc - -package goexperiment - -const DeadlockGC = false -const DeadlockGCInt = 0 diff --git a/src/internal/goexperiment/exp_deadlockgc_on.go b/src/internal/goexperiment/exp_deadlockgc_on.go deleted file mode 100644 index 5c2b07ca55653b..00000000000000 --- a/src/internal/goexperiment/exp_deadlockgc_on.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build goexperiment.deadlockgc - -package goexperiment - -const DeadlockGC = true -const DeadlockGCInt = 1 diff --git a/src/internal/goexperiment/exp_golfgc_off.go b/src/internal/goexperiment/exp_golfgc_off.go new file mode 100644 index 00000000000000..547d1b4ffcda95 --- /dev/null +++ b/src/internal/goexperiment/exp_golfgc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.golfgc + +package goexperiment + +const GolfGC = false +const GolfGCInt = 0 diff --git a/src/internal/goexperiment/exp_golfgc_on.go b/src/internal/goexperiment/exp_golfgc_on.go new file mode 100644 index 00000000000000..830b9015ccbf07 --- /dev/null +++ b/src/internal/goexperiment/exp_golfgc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.golfgc + +package goexperiment + +const GolfGC = true +const GolfGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index 969f975dd4c9e3..2175b6ca09d679 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -130,6 +130,6 @@ type Flags struct { // GreenTeaGC enables the Green Tea GC implementation. GreenTeaGC bool - // DeadlockGC enables the Deadlock GC implementation. - DeadlockGC bool + // GolfGC enables the Deadlock GC implementation. + GolfGC bool } diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b7105aa965f62a..709bda103c37e5 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -373,32 +373,35 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots) - // during deadlock detection GC, nLiveStackRoots is the number of stackRoots + // During normal GC cycle, nStackRoots == nLiveStackRoots == len(stackRoots); + // during goroutine leak detection, nLiveStackRoots is the number of stackRoots // to examine, and nStackRoots == len(stackRoots), which include goroutines that are // unmarked / not runnable nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nLiveStackRoots int - // The GC has performed deadlock detection during this GC cycle. - detectedDeadlocks bool - - // Is set to true by DetectDeadlocks(), instructing the next GC cycle to perform deadlock detection. - pendingDeadlockDetection bool - - // When set, the GC is running in deadlock detection mode. - // This can be triggered with a runtime flag. - deadlockDetectionMode bool + // The following fields monitor the GC phase of the current cycle during + // goroutine leak detection. + // + // - pendingGoleakDetection: The GC has been instructed to perform goroutine leak + // detection during the next GC cycle; it is set by DetectGoroutineLeaks() + // and unset during gcStart(). + // - detectingGoleaks: The GC is running in goroutine leak detection mode; it is set + // during gcStart() and unset during gcMarkTermination(). + // - detectedGoleaks: The GC has performed goroutine leak detection during the current + // GC cycle; it is set during gcMarkDone(), right after goroutine leak detection has concluded, + // and unset during gcStart(). + pendingGoleakDetection, detectingGoleaks, detectedGoleaks bool // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 // stackRoots is a snapshot of all of the Gs that existed before the - // beginning of concurrent marking. During deadlock detection GC, stackRoots + // beginning of concurrent marking. During goroutine leak detection, stackRoots // is partitioned into two sets; to the left of nLiveStackRoots are stackRoots // of running / runnable goroutines and to the right of nLiveStackRoots are // stackRoots of unmarked / not runnable goroutines // gcDiscoverMoreStackRoots modifies the stackRoots array to redo the partition - // after each marking phase + // after each marking phase iteration. stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -565,25 +568,25 @@ func GC() { releasem(mp) } -// DetectDeadlocks instructs the Go garbage collector to attempt -// partial deadlock detection. +// FindGoleaks instructs the Go garbage collector to attempt +// goroutine leak detection during the next GC cycle. // -// Only operates if deadlockgc is enabled in GOEXPERIMENT. +// Only operates if golfgc is enabled in GOEXPERIMENT. // Otherwise, it just runs runtime.GC(). -func DetectDeadlocks() { - if !goexperiment.DeadlockGC { +func FindGoLeaks() { + if !goexperiment.GolfGC { GC() return } // This write should be thread-safe, as the overwritten value is true. - // pendingDeadlockDetection is only set to false under STW at the start + // pendingGoleakDetection is only set to false under STW at the start // of the GC cycle that picks it up. - work.pendingDeadlockDetection = true + work.pendingGoleakDetection = true // This read should be thread-safe for the same reason as the write above above. // At most, we trigger the GC an additional time. - for work.pendingDeadlockDetection { + for work.pendingGoleakDetection { GC() } } @@ -733,8 +736,8 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if goexperiment.DeadlockGC { - if work.pendingDeadlockDetection { + } else if goexperiment.GolfGC { + if work.pendingGoleakDetection { // Fully stop the world if running deadlock detection. mode = gcForceBlockMode } @@ -800,7 +803,7 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) - work.detectedDeadlocks = false + work.detectedGoleaks = false // Assists and workers can start the moment we start // the world. @@ -832,11 +835,11 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) - if goexperiment.DeadlockGC { - if work.pendingDeadlockDetection { + if goexperiment.GolfGC { + if work.pendingGoleakDetection { // Write is thread-safe because the world is stopped - work.deadlockDetectionMode = true - work.pendingDeadlockDetection = false + work.detectingGoleaks = true + work.pendingGoleakDetection = false } } @@ -940,8 +943,8 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) - if goexperiment.DeadlockGC { - if work.deadlockDetectionMode { + if goexperiment.GolfGC { + if work.detectingGoleaks { gcDiscoverMoreStackRoots() } } @@ -1050,11 +1053,13 @@ top: }) semrelease(&worldsema) goto top - } else if goexperiment.DeadlockGC { - // Otherwise, do a deadlock detection round. - // Only do one deadlock detection round per GC cycle. - if work.deadlockDetectionMode && !work.detectedDeadlocks { - work.detectedDeadlocks = detectDeadlocks() + } else if goexperiment.GolfGC { + // If we are detecting goroutine leaks, do so now. + if work.detectingGoleaks && !work.detectedGoleaks { + // Detect goroutine leaks. If the returned value is true, then + // detection was performed during this cycle. Otherwise, more mark work is needed, + // or live goroutines were found. + work.detectedGoleaks = findGoleaks() getg().m.preemptoff = "" systemstack(func() { @@ -1243,12 +1248,12 @@ func gcDiscoverMoreStackRoots() { } } -// detectDeadlocks scans the remaining stackRoots and marks any which are +// findGoleaks scans the remaining stackRoots and marks any which are // blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). -// Returns true if goroutine leak was performed (or unnecessary). -// Returns false if the GC cycle has not yet reached a fix point for reachable goroutines. -func detectDeadlocks() bool { - // Report deadlocks and mark them unreachable, and resume marking +// Returns true if the goroutine leak check was performed (or unnecessary). +// Returns false if the GC cycle has not yet computed all (maybe-)live goroutines. +func findGoleaks() bool { + // Report goroutine leaks and mark them unreachable, and resume marking // we still need to mark these unreachable *g structs as they // get reused, but their stack won't get scanned if work.nLiveStackRoots == work.nStackRoots { @@ -1281,10 +1286,10 @@ func detectDeadlocks() bool { return false } - // For the remaining goroutines, mark them as unreachable and deadlocking. + // For the remaining goroutines, mark them as unreachable and leaked. for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { gp := work.stackRoots[i].unmask() - casgstatus(gp, _Gwaiting, _Gdeadlocked) + casgstatus(gp, _Gwaiting, _Gleaked) fn := findfunc(gp.startpc) if fn.valid() { print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes\n") @@ -1454,11 +1459,11 @@ func gcMarkTermination(stw worldStop) { } systemstack(func() { - if goexperiment.DeadlockGC { - // Pull the GC out of deadlock detection mode. + if goexperiment.GolfGC { + // Pull the GC out of goroutine leak detection mode. // Write is thread-safe because the world is stopped, and only one // GC cycle can run at a time. - work.deadlockDetectionMode = false + work.detectingGoleaks = false } // The memstats updated above must be updated with the world @@ -1888,7 +1893,7 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if !work.deadlockDetectionMode { + if !work.detectingGoleaks { return work.markrootNext < work.markrootJobs } rootNext := atomic.Load(&work.markrootNext) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 37c44b0c04a7c6..1f0911d38d624c 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -60,7 +60,7 @@ const ( // //go:nosplit func gcMask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { return unsafe.Pointer(uintptr(p) | gcBitMask) } return p @@ -70,15 +70,15 @@ func gcMask(p unsafe.Pointer) unsafe.Pointer { // //go:nosplit func gcUnmask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { return unsafe.Pointer(uintptr(p) & gcUndoBitMask) } return p } -// internalBlocked returns true if the goroutine is blocked due to a -// non-deadlocking waitReason, e.g. waiting for the netpoller or garbage collector. -// Such goroutines should never be considered for deadlock detection. +// internalBlocked returns true if the goroutine is blocked due to an +// internal (non-leaking) waitReason, e.g. waiting for the netpoller or garbage collector. +// Such goroutines are never leak detection candidates according to the GC. // //go:nosplit func (gp *g) internalBlocked() bool { @@ -170,8 +170,8 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - if goexperiment.DeadlockGC { - if work.deadlockDetectionMode { + if goexperiment.GolfGC { + if work.detectingGoleaks { work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() } else { // regular GC --- scan every go routine @@ -950,7 +950,7 @@ func scanstack(gp *g, gcw *gcWork) int64 { case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") throw("scanstack: goroutine not stopped") - case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // ok } @@ -1307,8 +1307,6 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { if rootNext < rootJobs { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. - // - // Continue unconditionally if we're draining partial deadlocks. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { job, success := gcUpdateMarkrootNext() if !success { @@ -1623,9 +1621,9 @@ func scanobject(b uintptr, gcw *gcWork) { // At this point we have extracted the next potential pointer. // Quickly filter out nil and pointers back to the current object. - // The GC will skip masked addresses if DeadlockGC is enabled. + // The GC will skip masked addresses if GolfGC is enabled. if obj != 0 && obj-b >= n && - (!goexperiment.DeadlockGC || obj <= gcUndoBitMask) { + (!goexperiment.GolfGC || obj <= gcUndoBitMask) { // Test if obj points into the Go heap and, if so, // mark the object. // diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index f2e98898536676..586abc433ffc0b 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -160,7 +160,7 @@ func suspendG(gp *g) suspendGState { s = _Gwaiting fallthrough - case _Grunnable, _Gsyscall, _Gwaiting, _Gdeadlocked: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // Claim goroutine by setting scan bit. // This may race with execution or readying of gp. // The scan bit keeps it from transition state. @@ -269,7 +269,7 @@ func resumeG(state suspendGState) { case _Grunnable | _Gscan, _Gwaiting | _Gscan, - _Gdeadlocked | _Gscan, + _Gleaked | _Gscan, _Gsyscall | _Gscan: casfrom_Gscanstatus(gp, s, s&^_Gscan) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 08de431e1423ac..14954787376c63 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -709,7 +709,7 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { for i, gp := range allgs { allgs[i] = gp.unmask() } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index b0237a29657d3e..1e17195ec26603 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,8 +87,8 @@ const ( // ready()ing this G. _Gpreempted // 9 - // _Gdeadlocked represents a deadlocked goroutine caught by the GC. - _Gdeadlocked // 10 + // _Gleaked represents a deadlocked goroutine caught by the GC. + _Gleaked // 10 // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The @@ -108,7 +108,7 @@ const ( _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 - _Gscandeadlocked = _Gscan + _Gdeadlocked // 0x100a + _Gscandeadlocked = _Gscan + _Gleaked // 0x100a ) const ( diff --git a/src/runtime/sema.go b/src/runtime/sema.go index d7288580c96611..69a98a24cfe316 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -305,7 +305,7 @@ func cansemacquire(addr *uint32) bool { func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() pAddr := unsafe.Pointer(addr) - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { if syncSema { // Mask the addr so it doesn't get marked during GC // through marking of the treap or marking of the blocked goroutine @@ -322,7 +322,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { pt := &root.treap for t := *pt; t != nil; t = *pt { var cmp bool - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) == uintptr(t.elem) @@ -373,7 +373,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) < uintptr(t.elem) @@ -426,7 +426,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) for ; s != nil; s = *ps { var cmp bool - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) } else { cmp = unsafe.Pointer(addr) == s.elem @@ -435,7 +435,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) goto Found } - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) } else { cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) @@ -504,7 +504,7 @@ Found: } tailtime = s.acquiretime } - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { s.g.waiting = nil } s.parent = nil @@ -627,7 +627,8 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { + // Storing this pointer is s.elem = gcMask(unsafe.Pointer(l)) s.g.waiting = s } @@ -648,7 +649,7 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } - if goexperiment.DeadlockGC { + if goexperiment.GolfGC { s.g.waiting = nil s.elem = nil } diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 44f28971f89de0..bbfd169a4c2260 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1199,15 +1199,15 @@ func elideWrapperCalling(id abi.FuncID) bool { } var gStatusStrings = [...]string{ - _Gidle: "idle", - _Grunnable: "runnable", - _Grunning: "running", - _Gsyscall: "syscall", - _Gwaiting: "waiting", - _Gdead: "dead", - _Gcopystack: "copystack", - _Gdeadlocked: "deadlocked", - _Gpreempted: "preempted", + _Gidle: "idle", + _Grunnable: "runnable", + _Grunning: "running", + _Gsyscall: "syscall", + _Gwaiting: "waiting", + _Gdead: "dead", + _Gcopystack: "copystack", + _Gleaked: "deadlocked", + _Gpreempted: "preempted", } func goroutineheader(gp *g) { diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index e04f012001dd60..8b5eafd170f488 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -122,7 +122,7 @@ func goStatusToTraceGoStatus(status uint32, wr waitReason) tracev2.GoStatus { tgs = tracev2.GoRunning case _Gsyscall: tgs = tracev2.GoSyscall - case _Gwaiting, _Gpreempted, _Gdeadlocked: + case _Gwaiting, _Gpreempted, _Gleaked: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the From d985e99b25c5dd0bff2db365770a54be08aeeffc Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 18 Jul 2025 12:21:18 +0200 Subject: [PATCH 6/8] Fixed status text for leaked goroutines. --- src/runtime/runtime2.go | 4 ++-- src/runtime/traceback.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 1e17195ec26603..0227c723de64bd 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,7 +87,7 @@ const ( // ready()ing this G. _Gpreempted // 9 - // _Gleaked represents a deadlocked goroutine caught by the GC. + // _Gleaked represents a leaked goroutine caught by the GC. _Gleaked // 10 // _Gscan combined with one of the above states other than @@ -108,7 +108,7 @@ const ( _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 - _Gscandeadlocked = _Gscan + _Gleaked // 0x100a + _Gscanleaked = _Gscan + _Gleaked // 0x100a ) const ( diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index bbfd169a4c2260..e8fef35da7d104 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1206,7 +1206,7 @@ var gStatusStrings = [...]string{ _Gwaiting: "waiting", _Gdead: "dead", _Gcopystack: "copystack", - _Gleaked: "deadlocked", + _Gleaked: "leaked", _Gpreempted: "preempted", } From 76ff6cfae027f55c09e3377318f169174dab5204 Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Fri, 18 Jul 2025 12:23:09 +0200 Subject: [PATCH 7/8] Fixed bad goroutine status. --- src/runtime/proc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 14954787376c63..3990d8af13c450 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1214,7 +1214,7 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { _Gscanwaiting, _Gscanrunning, _Gscansyscall, - _Gscandeadlocked, + _Gscanleaked, _Gscanpreempted: if newval == oldval&^_Gscan { success = gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -1235,7 +1235,7 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { case _Grunnable, _Grunning, _Gwaiting, - _Gdeadlocked, + _Gleaked, _Gsyscall: if newval == oldval|_Gscan { r := gp.atomicstatus.CompareAndSwap(oldval, newval) From e115ccefb05eb5763c3225e89aaa17233f0dfdbd Mon Sep 17 00:00:00 2001 From: Vlad Saioc Date: Mon, 21 Jul 2025 16:11:30 +0200 Subject: [PATCH 8/8] Addressing some of the comments. --- .../goexperiment/exp_goleakfindergc_off.go | 8 + .../goexperiment/exp_goleakfindergc_on.go | 8 + src/internal/goexperiment/exp_golfgc_off.go | 8 - src/internal/goexperiment/exp_golfgc_on.go | 8 - src/internal/goexperiment/flags.go | 4 +- src/runtime/mbitmap.go | 22 +++ src/runtime/mgc.go | 143 ++++++------------ src/runtime/mgcmark.go | 19 +-- src/runtime/proc.go | 2 +- src/runtime/runtime2.go | 3 +- src/runtime/sema.go | 23 +-- 11 files changed, 107 insertions(+), 141 deletions(-) create mode 100644 src/internal/goexperiment/exp_goleakfindergc_off.go create mode 100644 src/internal/goexperiment/exp_goleakfindergc_on.go delete mode 100644 src/internal/goexperiment/exp_golfgc_off.go delete mode 100644 src/internal/goexperiment/exp_golfgc_on.go diff --git a/src/internal/goexperiment/exp_goleakfindergc_off.go b/src/internal/goexperiment/exp_goleakfindergc_off.go new file mode 100644 index 00000000000000..1a141fd5b7cfc7 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = false +const GoroutineLeakFinderGCInt = 0 diff --git a/src/internal/goexperiment/exp_goleakfindergc_on.go b/src/internal/goexperiment/exp_goleakfindergc_on.go new file mode 100644 index 00000000000000..8c816645927656 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = true +const GoroutineLeakFinderGCInt = 1 diff --git a/src/internal/goexperiment/exp_golfgc_off.go b/src/internal/goexperiment/exp_golfgc_off.go deleted file mode 100644 index 547d1b4ffcda95..00000000000000 --- a/src/internal/goexperiment/exp_golfgc_off.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build !goexperiment.golfgc - -package goexperiment - -const GolfGC = false -const GolfGCInt = 0 diff --git a/src/internal/goexperiment/exp_golfgc_on.go b/src/internal/goexperiment/exp_golfgc_on.go deleted file mode 100644 index 830b9015ccbf07..00000000000000 --- a/src/internal/goexperiment/exp_golfgc_on.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build goexperiment.golfgc - -package goexperiment - -const GolfGC = true -const GolfGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index 2175b6ca09d679..53d6e92d37e7da 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -130,6 +130,6 @@ type Flags struct { // GreenTeaGC enables the Green Tea GC implementation. GreenTeaGC bool - // GolfGC enables the Deadlock GC implementation. - GolfGC bool + // GoroutineLeakFinderGC enables the Deadlock GC implementation. + GoroutineLeakFinderGC bool } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 7331886af27632..2661f878ecc3c6 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1247,6 +1247,28 @@ func markBitsForSpan(base uintptr) (mbits markBits) { return mbits } +// isMarkedOrNotInHeap returns true if a pointer is in the heap and marked, +// or if the pointer is not in the heap. Used by goroutine leak detection +// to determine if concurrency resources are reachable in memory. +func isMarkedOrNotInHeap(p unsafe.Pointer) bool { + obj, span, objIndex := findObject(uintptr(p), 0, 0) + if obj != 0 { + mbits := span.markBitsForIndex(objIndex) + return mbits.isMarked() + } + + // If we fall through to get here, the object is not in the heap. + // In this case, it is either a pointer to a stack object or a global resource. + // Treat it as reachable in memory by default, to be safe. + // + // (vsaioc) TODO: we could possibly be more precise by only checking against the stacks + // of runnable goroutines. I don't think this is necessary, based on what we've seen, but + // let's keep the option open in case the runtime evolves. + // This will (naively) lead to quadratic blow-up for goroutine leak detection, + // but if it is only run on demand, maybe the extra cost is not a show-stopper. + return true +} + // advance advances the markBits to the next object in the span. func (m *markBits) advance() { if m.mask == 1<<7 { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 709bda103c37e5..0f09ca067bcad3 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -381,16 +381,18 @@ type workType struct { // The following fields monitor the GC phase of the current cycle during // goroutine leak detection. - // - // - pendingGoleakDetection: The GC has been instructed to perform goroutine leak - // detection during the next GC cycle; it is set by DetectGoroutineLeaks() - // and unset during gcStart(). - // - detectingGoleaks: The GC is running in goroutine leak detection mode; it is set - // during gcStart() and unset during gcMarkTermination(). - // - detectedGoleaks: The GC has performed goroutine leak detection during the current - // GC cycle; it is set during gcMarkDone(), right after goroutine leak detection has concluded, - // and unset during gcStart(). - pendingGoleakDetection, detectingGoleaks, detectedGoleaks bool + goroutineLeakFinder struct { + // The GC has been instructed to perform goroutine leak detection during the next GC cycle; + // it is set by DetectGoroutineLeaks() and unset during gcStart(). + pending atomic.Bool + // The GC is running in goroutine leak detection mode; it is set during gcStart() + // and unset during gcMarkTermination(). Is protected by STW. + enabled bool + // The GC has performed goroutine leak detection during the current GC cycle; it is set + // during gcMarkDone(), right after goroutine leak detection has concluded, and unset during + // gcStart(). Is protected by STW. + done bool + } // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 @@ -571,22 +573,17 @@ func GC() { // FindGoleaks instructs the Go garbage collector to attempt // goroutine leak detection during the next GC cycle. // -// Only operates if golfgc is enabled in GOEXPERIMENT. +// Only operates if goroutineleakfindergc is enabled in GOEXPERIMENT. // Otherwise, it just runs runtime.GC(). func FindGoLeaks() { - if !goexperiment.GolfGC { + if !goexperiment.GoroutineLeakFinderGC { GC() return } - // This write should be thread-safe, as the overwritten value is true. - // pendingGoleakDetection is only set to false under STW at the start - // of the GC cycle that picks it up. - work.pendingGoleakDetection = true + work.goroutineLeakFinder.pending.Store(true) - // This read should be thread-safe for the same reason as the write above above. - // At most, we trigger the GC an additional time. - for work.pendingGoleakDetection { + for work.goroutineLeakFinder.pending.Load() { GC() } } @@ -736,8 +733,8 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode - } else if goexperiment.GolfGC { - if work.pendingGoleakDetection { + } else if goexperiment.GoroutineLeakFinderGC { + if work.goroutineLeakFinder.pending.Load() { // Fully stop the world if running deadlock detection. mode = gcForceBlockMode } @@ -803,7 +800,7 @@ func gcStart(trigger gcTrigger) { clearpools() work.cycles.Add(1) - work.detectedGoleaks = false + work.goroutineLeakFinder.done = false // Assists and workers can start the moment we start // the world. @@ -835,12 +832,9 @@ func gcStart(trigger gcTrigger) { // possible. setGCPhase(_GCmark) - if goexperiment.GolfGC { - if work.pendingGoleakDetection { - // Write is thread-safe because the world is stopped - work.detectingGoleaks = true - work.pendingGoleakDetection = false - } + if work.goroutineLeakFinder.pending.Load() { + work.goroutineLeakFinder.enabled = true + work.goroutineLeakFinder.pending.Store(false) } gcBgMarkPrepare() // Must happen before assists are enabled. @@ -943,10 +937,8 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) - if goexperiment.GolfGC { - if work.detectingGoleaks { - gcDiscoverMoreStackRoots() - } + if work.goroutineLeakFinder.enabled { + gcDiscoverMoreStackRoots() } top: @@ -1007,7 +999,8 @@ top: // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be // shaded. Transition to mark termination. - var now int64 + now := nanotime() + work.tMarkTerm = now getg().m.preemptoff = "gcing" var stw worldStop systemstack(func() { @@ -1053,44 +1046,13 @@ top: }) semrelease(&worldsema) goto top - } else if goexperiment.GolfGC { + } else if goexperiment.GoroutineLeakFinderGC { // If we are detecting goroutine leaks, do so now. - if work.detectingGoleaks && !work.detectedGoleaks { + if work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done { // Detect goroutine leaks. If the returned value is true, then // detection was performed during this cycle. Otherwise, more mark work is needed, // or live goroutines were found. - work.detectedGoleaks = findGoleaks() - - getg().m.preemptoff = "" - systemstack(func() { - // Accumulate the time we were stopped before we had to start again. - work.cpuStats.accumulateGCPauseTime(nanotime()-stw.finishedStopping, work.maxprocs) - - now := startTheWorldWithSema(0, stw) - work.pauseNS += now - stw.startedStopping - }) - semrelease(&worldsema) - goto top - } - - now = nanotime() - work.tMarkTerm = now - // Check again whether any P needs to flush its write barrier - // to the GC work queue. - systemstack(func() { - for _, p := range allp { - wbBufFlush1(p) - if !p.gcw.empty() { - restart = true - break - } - } - }) - - // If that is the case, restart again. Once restarts are no longer needed, - // run this without deadlock detection. - if restart { - gcDebugMarkDone.restartedDueTo27993 = true + work.goroutineLeakFinder.done = findGoleaks() getg().m.preemptoff = "" systemstack(func() { @@ -1141,24 +1103,11 @@ top: gcMarkTermination(stw) } -// Check if an object is marked in the heap. -func checkIfMarked(p unsafe.Pointer) bool { - obj, span, objIndex := findObject(uintptr(p), 0, 0) - if obj != 0 { - mbits := span.markBitsForIndex(objIndex) - return mbits.isMarked() - } - // if we fall through to get here, we are within the stack ranges of reachable goroutines - return true -} - -// maybeLive checks whether a goroutine may still be semantically runnable. -// This returns true if the goroutine is waiting on at least one concurrency primitive -// which is reachable in memory, i.e., has been by the GC. -// +// checkIfMaybeRunnable checks whether a goroutine may still be semantically runnable. // For goroutines which are semantically runnable, this will eventually return true -// as the GC marking phase progresses. -func (gp *g) maybeLive() bool { +// as the GC marking phase progresses. It returns false for leaked goroutines, or for +// goroutines which are not yet computed as possibly runnable by the GC. +func (gp *g) checkIfMaybeRunnable() bool { // Unmask the goroutine address to ensure we are not // dereferencing a masked address. gp = gp.unmask() @@ -1176,7 +1125,7 @@ func (gp *g) maybeLive() bool { // Cycle all through all *sudog to check whether // the goroutine is waiting on a marked channel. for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if checkIfMarked(unsafe.Pointer(sg.c)) { + if isMarkedOrNotInHeap(unsafe.Pointer(sg.c)) { return true } } @@ -1190,7 +1139,7 @@ func (gp *g) maybeLive() bool { // check if the synchronization primitive attached to the sudog is marked. if gp.waiting != nil { // Unmask the sema address and check if it's marked. - return checkIfMarked(gcUnmask(gp.waiting.elem)) + return isMarkedOrNotInHeap(gcUnmask(gp.waiting.elem)) } } return true @@ -1223,13 +1172,13 @@ func gcDiscoverMoreStackRoots() { // Reorder goroutine list for vIndex < ivIndex { gp := work.stackRoots[vIndex] - if gp.maybeLive() { + if gp.checkIfMaybeRunnable() { work.stackRoots[vIndex] = gp vIndex = vIndex + 1 continue } for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { - if swapGp := work.stackRoots[ivIndex]; swapGp.maybeLive() { + if swapGp := work.stackRoots[ivIndex]; swapGp.checkIfMaybeRunnable() { work.stackRoots[ivIndex] = gp work.stackRoots[vIndex] = swapGp.unmask() vIndex = vIndex + 1 @@ -1268,7 +1217,7 @@ func findGoleaks() bool { var foundMoreWork bool for i := work.nLiveStackRoots; i < work.nStackRoots; i++ { gp := work.stackRoots[i].unmask() - if readgstatus(gp) == _Gwaiting && !gp.maybeLive() { + if readgstatus(gp) == _Gwaiting && !gp.checkIfMaybeRunnable() { // Blocking unrunnable goroutines will be skipped. continue } @@ -1459,12 +1408,8 @@ func gcMarkTermination(stw worldStop) { } systemstack(func() { - if goexperiment.GolfGC { - // Pull the GC out of goroutine leak detection mode. - // Write is thread-safe because the world is stopped, and only one - // GC cycle can run at a time. - work.detectingGoleaks = false - } + // Pull the GC out of goroutine leak detection mode. + work.goroutineLeakFinder.enabled = false // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as @@ -1893,12 +1838,12 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if !work.detectingGoleaks { - return work.markrootNext < work.markrootJobs - } rootNext := atomic.Load(&work.markrootNext) rootJobs := atomic.Load(&work.markrootJobs) - return rootNext < rootJobs + if rootNext < rootJobs { + return true // root scan work available + } + return false } // gcMark runs the mark (or, for concurrent GC, mark termination) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 1f0911d38d624c..557b20ee918cc0 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -60,7 +60,7 @@ const ( // //go:nosplit func gcMask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { return unsafe.Pointer(uintptr(p) | gcBitMask) } return p @@ -70,7 +70,7 @@ func gcMask(p unsafe.Pointer) unsafe.Pointer { // //go:nosplit func gcUnmask(p unsafe.Pointer) unsafe.Pointer { - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { return unsafe.Pointer(uintptr(p) & gcUndoBitMask) } return p @@ -170,19 +170,14 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - if goexperiment.GolfGC { - if work.detectingGoleaks { - work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() - } else { - // regular GC --- scan every go routine - work.stackRoots = allGsSnapshot() - work.nLiveStackRoots = len(work.stackRoots) - } + if work.goroutineLeakFinder.enabled { + work.stackRoots, work.nLiveStackRoots = allGsSnapshotSortedForGC() } else { // regular GC --- scan every go routine work.stackRoots = allGsSnapshot() work.nLiveStackRoots = len(work.stackRoots) } + work.nStackRoots = len(work.stackRoots) work.markrootNext = 0 @@ -1621,9 +1616,9 @@ func scanobject(b uintptr, gcw *gcWork) { // At this point we have extracted the next potential pointer. // Quickly filter out nil and pointers back to the current object. - // The GC will skip masked addresses if GolfGC is enabled. + // The GC will skip masked addresses if GoroutineLeakFinderGC is enabled. if obj != 0 && obj-b >= n && - (!goexperiment.GolfGC || obj <= gcUndoBitMask) { + (!goexperiment.GoroutineLeakFinderGC || obj <= gcUndoBitMask) { // Test if obj points into the Go heap and, if so, // mark the object. // diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 3990d8af13c450..3c0a5144d2cc86 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -709,7 +709,7 @@ func allGsSnapshot() []*g { // monotonically and existing entries never change, so we can // simply return a copy of the slice header. For added safety, // we trim everything past len because that can still change. - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { for i, gp := range allgs { allgs[i] = gp.unmask() } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 0227c723de64bd..fe0aad02a01818 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -107,8 +107,7 @@ const ( _Gscansyscall = _Gscan + _Gsyscall // 0x1003 _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 - - _Gscanleaked = _Gscan + _Gleaked // 0x100a + _Gscanleaked = _Gscan + _Gleaked // 0x100a ) const ( diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 69a98a24cfe316..08167ff217a6a6 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -305,7 +305,7 @@ func cansemacquire(addr *uint32) bool { func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() pAddr := unsafe.Pointer(addr) - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { if syncSema { // Mask the addr so it doesn't get marked during GC // through marking of the treap or marking of the blocked goroutine @@ -322,7 +322,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { pt := &root.treap for t := *pt; t != nil; t = *pt { var cmp bool - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = uintptr(gcUnmask(pAddr)) == uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) == uintptr(t.elem) @@ -373,7 +373,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { return } last = t - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = uintptr(gcUnmask(pAddr)) < uintptr(gcUnmask(t.elem)) } else { cmp = uintptr(pAddr) < uintptr(t.elem) @@ -426,7 +426,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) for ; s != nil; s = *ps { var cmp bool - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = gcUnmask(unsafe.Pointer(addr)) == gcUnmask(s.elem) } else { cmp = unsafe.Pointer(addr) == s.elem @@ -435,7 +435,7 @@ func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) goto Found } - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { cmp = uintptr(gcUnmask(unsafe.Pointer(addr))) < uintptr(gcUnmask(s.elem)) } else { cmp = uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) @@ -504,7 +504,8 @@ Found: } tailtime = s.acquiretime } - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear the waiting pointer. s.g.waiting = nil } s.parent = nil @@ -627,8 +628,10 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() - if goexperiment.GolfGC { - // Storing this pointer is + if goexperiment.GoroutineLeakFinderGC { + // Storing this pointer (masked) so that we can trace + // the condvar address from the blocked goroutine when + // checking for goroutine leaks. s.elem = gcMask(unsafe.Pointer(l)) s.g.waiting = s } @@ -649,7 +652,9 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } - if goexperiment.GolfGC { + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear up its waiting pointer, + // and clean up the sudog before releasing it. s.g.waiting = nil s.elem = nil }