diff --git a/src/internal/goexperiment/exp_goleakfindergc_off.go b/src/internal/goexperiment/exp_goleakfindergc_off.go new file mode 100644 index 00000000000000..1a141fd5b7cfc7 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = false +const GoroutineLeakFinderGCInt = 0 diff --git a/src/internal/goexperiment/exp_goleakfindergc_on.go b/src/internal/goexperiment/exp_goleakfindergc_on.go new file mode 100644 index 00000000000000..8c816645927656 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = true +const GoroutineLeakFinderGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index d0ae75d4e1a1a3..f009c50e48fd12 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -133,4 +133,7 @@ type Flags struct { // RandomizedHeapBase enables heap base address randomization on 64-bit // platforms. RandomizedHeapBase64 bool + + // GoroutineLeakFinderGC enables the Deadlock GC implementation. + GoroutineLeakFinderGC bool } diff --git a/src/runtime/chan.go b/src/runtime/chan.go index bb554ebfdb1f3a..3fe5d635333a3f 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -263,11 +263,11 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { } // No stack splits between assigning elem and enqueuing mysg // on gp.waiting where copystack can find it. - mysg.elem = ep + mysg.elem.set(ep) mysg.waitlink = nil mysg.g = gp mysg.isSelect = false - mysg.c = c + mysg.c.set(c) gp.waiting = mysg gp.param = nil c.sendq.enqueue(mysg) @@ -298,7 +298,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } - mysg.c = nil + mysg.c.set(nil) releaseSudog(mysg) if closed { if c.closed == 0 { @@ -336,9 +336,9 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz } } - if sg.elem != nil { + if sg.elem.get() != nil { sendDirect(c.elemtype, sg, ep) - sg.elem = nil + sg.elem.set(nil) } gp := sg.g unlockf() @@ -395,7 +395,7 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) { // Once we read sg.elem out of sg, it will no longer // be updated if the destination's stack gets copied (shrunk). // So make sure that no preemption points can happen between read & use. - dst := sg.elem + dst := sg.elem.get() typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.Size_) // No need for cgo write barrier checks because dst is always // Go memory. @@ -406,7 +406,7 @@ func recvDirect(t *_type, sg *sudog, dst unsafe.Pointer) { // dst is on our stack or the heap, src is on another stack. // The channel is locked, so src will not move during this // operation. - src := sg.elem + src := sg.elem.get() typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.Size_) memmove(dst, src, t.Size_) } @@ -441,9 +441,9 @@ func closechan(c *hchan) { if sg == nil { break } - if sg.elem != nil { - typedmemclr(c.elemtype, sg.elem) - sg.elem = nil + if sg.elem.get() != nil { + typedmemclr(c.elemtype, sg.elem.get()) + sg.elem.set(nil) } if sg.releasetime != 0 { sg.releasetime = cputicks() @@ -463,7 +463,7 @@ func closechan(c *hchan) { if sg == nil { break } - sg.elem = nil + sg.elem.set(nil) if sg.releasetime != 0 { sg.releasetime = cputicks() } @@ -642,13 +642,13 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } // No stack splits between assigning elem and enqueuing mysg // on gp.waiting where copystack can find it. - mysg.elem = ep + mysg.elem.set(ep) mysg.waitlink = nil gp.waiting = mysg mysg.g = gp mysg.isSelect = false - mysg.c = c + mysg.c.set(c) gp.param = nil c.recvq.enqueue(mysg) if c.timer != nil { @@ -680,7 +680,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } success := mysg.success gp.param = nil - mysg.c = nil + mysg.c.set(nil) releaseSudog(mysg) return true, success } @@ -727,14 +727,14 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { typedmemmove(c.elemtype, ep, qp) } // copy data from sender to queue - typedmemmove(c.elemtype, qp, sg.elem) + typedmemmove(c.elemtype, qp, sg.elem.get()) c.recvx++ if c.recvx == c.dataqsiz { c.recvx = 0 } c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz } - sg.elem = nil + sg.elem.set(nil) gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 8696672065c26a..e82b191f5d7b72 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -187,18 +187,18 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) cmd.Dir = "testdata/" + binary cmd = testenv.CleanCmdEnv(cmd) - // Add the rangefunc GOEXPERIMENT unconditionally since some tests depend on it. + // Add the rangefunc and goroutineleakfindergc GOEXPERIMENT unconditionally since some tests depend on it. // TODO(61405): Remove this once it's enabled by default. edited := false for i := range cmd.Env { e := cmd.Env[i] if _, vars, ok := strings.Cut(e, "GOEXPERIMENT="); ok { - cmd.Env[i] = "GOEXPERIMENT=" + vars + ",rangefunc" + cmd.Env[i] = "GOEXPERIMENT=" + vars + ",rangefunc,goroutineleakfindergc" edited = true } } if !edited { - cmd.Env = append(cmd.Env, "GOEXPERIMENT=rangefunc") + cmd.Env = append(cmd.Env, "GOEXPERIMENT=rangefunc,goroutineleakfindergc") } out, err := cmd.CombinedOutput() diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 2fc5b4a38a8e85..158890d7d1e543 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1222,7 +1222,7 @@ func (t *SemTable) Enqueue(addr *uint32) { s.releasetime = 0 s.acquiretime = 0 s.ticket = 0 - t.semTable.rootFor(addr).queue(addr, s, false) + t.semTable.rootFor(addr).queue(addr, s, false, false) } // Dequeue simulates dequeuing a waiter for a semaphore (or lock) at addr. diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0a1e01cbcf9d7c..ba53487625df72 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -14,9 +14,11 @@ import ( "math/rand" "os" "reflect" + "regexp" "runtime" "runtime/debug" "slices" + "strconv" "strings" "sync" "sync/atomic" @@ -1095,3 +1097,420 @@ func TestDetectFinalizerAndCleanupLeaks(t *testing.T) { t.Fatalf("expected %d symbolized locations, got:\n%s", wantSymbolizedLocations, got) } } + +// This tests the goroutine leak garbage collector. +func TestGoroutineLeakGC(t *testing.T) { + // Goroutine leak test case. + // + // Test cases can be configured with test name, the name of the entry point function, + // a set of expected leaks identified by regular expressions, and the number of times + // the test should be repeated. + // + // Repetitions are used to amortize flakiness in some tests. + type testCase struct { + name string + repetitions int + expectedLeaks map[*regexp.Regexp]bool + + // flakyLeaks are goroutine leaks that are too flaky to be reliably detected. + // Still, they might pop up every once in a while. + // If these occur, do not fail the test due to unexpected leaks. + flakyLeaks map[*regexp.Regexp]struct{} + } + + // makeTest is a short-hand for creating test cases. + // Each of the leaks in the list is identified by a regular expression. + // + // If a leak is the string "FLAKY", it notifies makeTest that any remaining + // leak patterns should be added to the flakyLeaks map. + makeTest := func( + cfg testCase, + leaks ...string) testCase { + tc := testCase{ + name: cfg.name, + expectedLeaks: make(map[*regexp.Regexp]bool, len(leaks)), + flakyLeaks: make(map[*regexp.Regexp]struct{}, len(leaks)), + } + // Default to 1 repetition if not specified. + // One extra rep for configured tests is irrelevant. + tc.repetitions = cfg.repetitions | 1 + + const ( + EXPECTED int = iota + FLAKY + ) + + mode := EXPECTED + for _, leak := range leaks { + if leak == "FLAKY" { + mode = FLAKY + continue + } + + switch mode { + case EXPECTED: + tc.expectedLeaks[regexp.MustCompile(leak)] = false + case FLAKY: + tc.flakyLeaks[regexp.MustCompile(leak)] = struct{}{} + } + } + return tc + } + + // Micro tests involve very simple leaks for each type of concurrency primitive operation. + microTests := []testCase{ + makeTest(testCase{name: "NilRecv"}, `\[chan receive \(nil chan\)\]`), + makeTest(testCase{name: "NilSend"}, `\[chan send \(nil chan\)\]`), + makeTest(testCase{name: "SelectNoCases"}, `\[select \(no cases\)\]`), + makeTest(testCase{name: "ChanRecv"}, `\[chan receive\]`), + makeTest(testCase{name: "ChanSend"}, `\[chan send\]`), + makeTest(testCase{name: "Select"}, `\[select\]`), + makeTest(testCase{name: "WaitGroup"}, `\[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "MutexStack"}, `\[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "MutexHeap"}, `\[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cond"}, `\[sync\.Cond\.Wait\]`), + makeTest(testCase{name: "RWMutexRLock"}, `\[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "RWMutexLock"}, `\[sync\.(RW)?Mutex\.Lock\]`), + makeTest(testCase{name: "Mixed"}, `\[sync\.WaitGroup\.Wait\]`, `\[chan send\]`), + makeTest(testCase{name: "NoLeakGlobal"}), + } + + // Common goroutine leak patterns. + // Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" + // doi:10.1109/CGO57630.2024.10444835 + patternTestCases := []testCase{ + makeTest(testCase{name: "NoCloseRange"}, + `main\.NoCloseRange\.gowrap1 .* \[chan send\]`, + `main\.noCloseRange\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "MethodContractViolation"}, + `main\.worker\.Start\.func1 .* \[select\]`), + makeTest(testCase{name: "DoubleSend"}, + `main\.DoubleSend\.func3 .* \[chan send\]`), + makeTest(testCase{name: "EarlyReturn"}, + `main\.earlyReturn\.func1 .* \[chan send\]`), + makeTest(testCase{name: "NCastLeak"}, + `main\.nCastLeak\.func1 .* \[chan send\]`, + `main\.NCastLeak\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Timeout"}, + `main\.timeout\.func1 .* \[chan send\]`), + } + + // GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs". + // White paper found at https://lujie.ac.cn/files/papers/GoBench.pdf + // doi:10.1109/CGO51591.2021.9370317. + // + // This list is curated for tests that are not excessively flaky. + gokerTestCases := []testCase{ + makeTest(testCase{name: "Cockroach584"}, + `main\.Cockroach584\.func2\.1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach1055"}, + `main\.Cockroach1055\.func2 .* \[chan receive\]`, + `main\.Cockroach1055\.func2\.1 .* \[chan receive\]`, + `main\.Cockroach1055\.func2\.2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Cockroach1462"}, + `main\.\(\*Stopper_cockroach1462\)\.RunWorker\.func1 .* \[chan send\]`, + `main\.Cockroach1462\.func2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Cockroach2448"}, + `main\.Cockroach2448\.func2\.gowrap1 .* \[select\]`, + `main\.Cockroach2448\.func2\.gowrap2 .* \[select\]`), + makeTest(testCase{name: "Cockroach3710"}, + `main\.Cockroach3710\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + `main\.\(\*Store_cockroach3710\)\.processRaft\.func1 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Cockroach6181", repetitions: 50}, + `main\.testRangeCacheCoalescedRequests_cockroach6181 .* \[sync\.WaitGroup\.Wait\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.Lock\]`, + `main\.testRangeCacheCoalescedRequests_cockroach6181\.func1\.1 .* \[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "Cockroach7504", repetitions: 100}, + `main\.Cockroach7504\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.Cockroach7504\.func2\.2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach9935"}, + `main\.Cockroach9935\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach10214"}, + `main\.Cockroach10214\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.Cockroach10214\.func2\.2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Cockroach10790"}, + `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13197"}, + `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13755"}, + `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach16167"}, + `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, + `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Cockroach10790"}, + `main\.\(\*Replica_cockroach10790\)\.beginCmds\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13197"}, + `main\.\(\*DB_cockroach13197\)\.begin\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach13755"}, + `main\.\(\*Rows_cockroach13755\)\.initContextClose\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach16167"}, + `main\.Cockroach16167\.func2 .* \[sync\.RWMutex\.RLock\]`, + `main\.Cockroach16167\.func2\.gowrap1 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Cockroach18101"}, + `main\.restore_cockroach18101\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Cockroach24808"}, + `main\.Cockroach24808\.func2 .* \[chan send\]`), + makeTest(testCase{name: "Cockroach25456"}, + `main\.Cockroach25456\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Cockroach35073"}, + `main\.Cockroach35073\.func2.1 .* \[chan send\]`, + `main\.Cockroach35073\.func2 .* \[chan send\]`), + makeTest(testCase{name: "Cockroach35931"}, + `main\.Cockroach35931\.func2 .* \[chan send\]`), + makeTest(testCase{name: "Etcd5509"}, + `main\.Etcd5509\.func2 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Etcd6857"}, + `main\.Etcd6857\.func2\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Etcd6873"}, + `main\.Etcd6873\.func2\.gowrap1 .* \[chan receive\]`, + `main\.newWatchBroadcasts_etcd6873\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Etcd7492"}, + `main\.Etcd7492\.func2 .* \[sync\.WaitGroup\.Wait\]`, + `main\.Etcd7492\.func2\.1 .* \[chan send\]`, + `main\.NewSimpleTokenTTLKeeper_etcd7492\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Etcd7902"}, + `main\.doRounds_etcd7902\.gowrap1 .* \[chan receive\]`, + `main\.doRounds_etcd7902\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.runElectionFunc_etcd7902 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Etcd10492"}, + `main\.Etcd10492\.func2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Grpc660"}, + `main\.\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Grpc795"}, + `main\.\(\*test_grpc795\)\.startServer\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.testServerGracefulStopIdempotent_grpc795 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Grpc862"}, + `main\.DialContext_grpc862\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Grpc1275"}, + `main\.testInflightStreamClosing_grpc1275\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Grpc1424"}, + `main\.DialContext_grpc1424\.func1 .* \[chan receive\]`), + makeTest(testCase{name: "Grpc1460"}, + `main\.Grpc1460\.func2\.gowrap1 .* \[chan receive\]`, + `main\.Grpc1460\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Grpc3017", repetitions: 50}, + // grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists. + // Testing runtime behaviour when pivoting between regular and goroutine leak detection modes. + `main\.Grpc3017\.func2 .* \[chan receive\]`, + `main\.Grpc3017\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Hugo3251", repetitions: 20}, + `main\.Hugo3251\.func2 .* \[sync\.WaitGroup\.Wait\]`, + `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Hugo3251\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "Hugo5379"}, + `main\.\(\*Page_hugo5379\)\.initContent\.func1\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*Site_hugo5379\)\.renderPages\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Hugo5379\.func2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Istio16224"}, + `main\.Istio16224\.func2 .* \[sync\.Mutex\.Lock\]`, + `main\.Istio16224\.func2\.gowrap1 .* \[chan send\]`, + // This is also a leak, but it is too flaky to be reliably detected. + `FLAKY`, + `main\.Istio16224\.func2\.gowrap1 .* \[chan receive\]`), + makeTest(testCase{name: "Istio17860"}, + `main\.\(\*agent_istio17860\)\.Restart\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Istio18454"}, + `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan receive\]`, + `main\.\(\*Worker_istio18454\)\.Start\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes1321"}, + `main\.NewMux_kubernetes1321\.gowrap1 .* \[chan send\]`, + `main\.testMuxWatcherClose_kubernetes1321 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes5316"}, + `main\.finishRequest_kubernetes5316\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes6632"}, + `main\.Kubernetes6632\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Kubernetes6632\.func2\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes10182"}, + `main\.\(\*statusManager_kubernetes10182\)\.Start\.func1 .* \[sync\.Mutex\.Lock\]`, + `main\.Kubernetes10182\.func2\.gowrap2 .* \[chan send\]`, + `main\.Kubernetes10182\.func2\.gowrap3 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes11298"}, + `main\.After_kubernetes11298\.func1 .* \[chan receive\]`, + `main\.After_kubernetes11298\.func1 .* \[sync\.Cond\.Wait\]`, + `main\.Kubernetes11298\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Kubernetes13135"}, + `main\.Kubernetes13135\.func2 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Kubernetes25331"}, + `main\.Kubernetes25331\.func2\.gowrap1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes26980"}, + `main\.Kubernetes26980\.func2 .* \[chan receive\]`, + `main\.Kubernetes26980\.func2\.1 .* \[sync\.Mutex\.Lock\]`, + `main\.Kubernetes26980\.func2\.gowrap2 .* \[chan receive\]`), + makeTest(testCase{name: "Kubernetes30872"}, + `main\.\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*federatedInformerImpl_kubernetes30872\)\.Start\.gowrap2 .* \[sync\.Mutex\.Lock\]`, + `main\.\(\*NamespaceController_kubernetes30872\)\.Run\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes38669"}, + `main\.newCacheWatcher_kubernetes38669\.gowrap1 .* \[chan send\]`), + makeTest(testCase{name: "Kubernetes58107"}, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap1 .* \[sync\.Cond\.Wait\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap2 .* \[sync\.Cond\.Wait\]`, + `main\.\(\*ResourceQuotaController_kubernetes58107\)\.Run\.gowrap2 .* \[sync\.RWMutex\.RLock\]`, + `main\.startResourceQuotaController_kubernetes58107\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes62464"}, + `main\.Kubernetes62464\.func2\.gowrap1 .* \[sync\.RWMutex\.RLock\]`, + `main\.Kubernetes62464\.func2\.gowrap2 .* \[sync\.RWMutex\.Lock\]`), + makeTest(testCase{name: "Kubernetes70277"}, + `main\.Kubernetes70277\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Moby4395"}, + `main\.Go_moby4395\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Moby4951"}, + `main\.Moby4951\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Moby4951\.func2\.gowrap2 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Moby7559"}, + `main\.Moby7559\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Moby17176"}, + `main\.testDevmapperLockReleasedDeviceDeletion_moby17176\.func1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Moby21233"}, + `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[chan send\]`, + `main\.\(\*Transfer_moby21233\)\.Watch\.func1 .* \[select\]`, + `main\.testTransfer_moby21233 .* \[chan receive\]`), + makeTest(testCase{name: "Moby25348"}, + `main\.Moby25348\.func2\.gowrap1 .* \[sync\.WaitGroup\.Wait\]`), + makeTest(testCase{name: "Moby27782"}, + `main\.\(\*JSONFileLogger_moby27782\)\.ReadLogs\.gowrap1 .* \[sync\.Cond\.Wait\]`, + `main\.NewWatcher_moby27782\.gowrap1 .* \[select\]`), + makeTest(testCase{name: "Moby28462"}, + `main\.Moby28462\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`, + `main\.Moby28462\.func2\.gowrap2 .* \[chan send\]`), + makeTest(testCase{name: "Moby29733"}, + `main\.Moby29733\.func2 .* \[chan receive\]`, + `main\.testActive_moby29733\.func1 .* \[sync\.Cond\.Wait\]`), + makeTest(testCase{name: "Moby30408"}, + `main\.Moby30408\.func2 .* \[chan receive\]`, + `main\.testActive_moby30408\.func1 .* \[sync\.Cond\.Wait\]`), + makeTest(testCase{name: "Moby33781"}, + `main\.monitor_moby33781\.func1 .* \[chan send\]`), + makeTest(testCase{name: "Moby36114"}, + `main\.Moby36114\.func2\.gowrap1 .* \[sync\.Mutex\.Lock\]`), + makeTest(testCase{name: "Serving2137"}, + `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[chan send\]`, + `main\.\(\*Breaker_serving2137\)\.concurrentRequest\.func1 .* \[sync\.Mutex\.Lock\]`, + `main\.Serving2137\.func2 .* \[chan receive\]`), + makeTest(testCase{name: "Syncthing4829"}, + `main\.Syncthing4829\.func2 .* \[sync\.RWMutex\.RLock\]`), + makeTest(testCase{name: "Syncthing5795"}, + `main\.\(\*rawConnection_syncthing5795\)\.Start\.func1 .* \[chan receive\]`, + `main\.Syncthing5795\.func2 .* \[chan receive\]`), + } + + // Combine all test cases into a single list. + testCases := append(microTests, patternTestCases...) + testCases = append(testCases, gokerTestCases...) + + // Test cases must not panic or cause fatal exceptions. + failStates := regexp.MustCompile(`fatal|panic`) + + // Build the test program once. + exe, err := buildTestProg(t, "testgoroutineleakgc") + if err != nil { + t.Fatal(fmt.Sprintf("building testgoroutineleakgc failed: %v", err)) + } + + for _, tcase := range testCases { + t.Run(tcase.name, func(t *testing.T) { + // Run tests in parallel. + t.Parallel() + + // Default to 1 repetition if not specified. + // One extra rep for tests with a specified number of repetitions + // is irrelevant. + repetitions := tcase.repetitions | 1 + + // Output trace. Aggregated across all repetitions. + var output string + // Output and trace are protected by separate mutexes to reduce contention. + var outputMu sync.Mutex + var traceMu sync.RWMutex + // Wait group coordinates across all repetitions. + var wg sync.WaitGroup + + wg.Add(repetitions) + for i := 0; i < repetitions; i++ { + go func() { + defer wg.Done() + + // FIXME: Use GODEBUG flag only temporarily until we can use pprof/goroutineleaks. + repOutput := runBuiltTestProg(t, exe, tcase.name, "GODEBUG=gctrace=1,gcgoroutineleaks=1") + + // If the test case was not expected to produce leaks, but some were reported, + // stop the test immediately. Zero tolerance policy for false positives. + if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && strings.Contains(repOutput, "goroutine leak!") { + t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", repOutput) + } + + // Zero tolerance policy for fatal exceptions or panics. + if failStates.MatchString(repOutput) { + t.Errorf("output:\n%s\n\nunexpected fatal exception or panic", repOutput) + } + + // Parse the output line by line and look for the `goroutine leak!` message. + LINES: + for _, line := range strings.Split(repOutput, "\n") { + // We are not interested in anything else. + if !strings.Contains(line, "goroutine leak!") { + continue + } + + // Check if the leak is expected. + // If it is, check whether it has been encountered before. + var foundNew bool + var leakPattern *regexp.Regexp + traceMu.RLock() + for expectedLeak, ok := range tcase.expectedLeaks { + if expectedLeak.MatchString(line) { + if !ok { + foundNew = true + } + + leakPattern = expectedLeak + break + } + } + traceMu.RUnlock() + + if foundNew { + // Only bother writing if we found a new leak. + traceMu.Lock() + tcase.expectedLeaks[leakPattern] = true + traceMu.Unlock() + } + + if leakPattern == nil { + // We are dealing with a leak not marked as expected. + // Check if it is a flaky leak. + for flakyLeak := range tcase.flakyLeaks { + if flakyLeak.MatchString(line) { + // The leak is flaky. Carry on to the next line. + continue LINES + } + } + + t.Errorf("output:\n%s\n\nunexpected goroutine leak: %s", repOutput, line) + } + } + + outputMu.Lock() + output += "\nRepetition " + strconv.Itoa(i) + ":\n" + repOutput + "\n--------------------------\n" + outputMu.Unlock() + }() + } + + // Coordinate across all repetitions. + wg.Wait() + missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks)) + for expectedLeak, found := range tcase.expectedLeaks { + if !found { + missingLeakStrs = append(missingLeakStrs, expectedLeak.String()) + } + } + + if len(missingLeakStrs) > 0 { + t.Fatalf("output:\n%s\n\nnot enough goroutines leaks detected. Missing:\n%s", output, strings.Join(missingLeakStrs, ", ")) + } + }) + } +} diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 7331886af27632..2661f878ecc3c6 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1247,6 +1247,28 @@ func markBitsForSpan(base uintptr) (mbits markBits) { return mbits } +// isMarkedOrNotInHeap returns true if a pointer is in the heap and marked, +// or if the pointer is not in the heap. Used by goroutine leak detection +// to determine if concurrency resources are reachable in memory. +func isMarkedOrNotInHeap(p unsafe.Pointer) bool { + obj, span, objIndex := findObject(uintptr(p), 0, 0) + if obj != 0 { + mbits := span.markBitsForIndex(objIndex) + return mbits.isMarked() + } + + // If we fall through to get here, the object is not in the heap. + // In this case, it is either a pointer to a stack object or a global resource. + // Treat it as reachable in memory by default, to be safe. + // + // (vsaioc) TODO: we could possibly be more precise by only checking against the stacks + // of runnable goroutines. I don't think this is necessary, based on what we've seen, but + // let's keep the option open in case the runtime evolves. + // This will (naively) lead to quadratic blow-up for goroutine leak detection, + // but if it is only run on demand, maybe the extra cost is not a show-stopper. + return true +} + // advance advances the markBits to the next object in the span. func (m *markBits) advance() { if m.mask == 1<<7 { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f2df1a00e0c683..5e0fa8a2581521 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -364,8 +364,8 @@ type workType struct { // (and thus 8-byte alignment even on 32-bit architectures). bytesMarked uint64 - markrootNext uint32 // next markroot job - markrootJobs uint32 // number of markroot jobs + markrootNext atomic.Uint32 // next markroot job + markrootJobs atomic.Uint32 // number of markroot jobs nproc uint32 tstart int64 @@ -373,17 +373,36 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // nStackRoots == len(stackRoots), but we have nStackRoots for - // consistency. - nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int + // During normal GC cycle, nStackRoots == nMaybeRunnableStackRoots == len(stackRoots); + // during goroutine leak detection, nMaybeRunnableStackRoots is the number of stackRoots + // scheduled for marking. + // In both variants, nStackRoots == len(stackRoots). + nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nMaybeRunnableStackRoots int + + // The following fields monitor the GC phase of the current cycle during + // goroutine leak detection. + goroutineLeakFinder struct { + // The GC has been instructed to perform goroutine leak detection during the next GC cycle; + // it is set by DetectGoroutineLeaks() and unset during gcStart(). + pending atomic.Bool + // The GC is running in goroutine leak detection mode; it is set during gcStart() + // and unset during gcMarkTermination(). Is protected by STW. + enabled bool + // The GC has performed goroutine leak detection during the current GC cycle; it is set + // during gcMarkDone(), right after goroutine leak detection has concluded, and unset during + // gcMarkTermination(). Is protected by STW. + done bool + } // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 - // stackRoots is a snapshot of all of the Gs that existed - // before the beginning of concurrent marking. The backing - // store of this must not be modified because it might be - // shared with allgs. + // stackRoots is a snapshot of all of the Gs that existed before the + // beginning of concurrent marking. During goroutine leak detection, stackRoots + // is partitioned into two sets; to the left of nMaybeRunnableStackRoots are stackRoots + // of running / runnable goroutines and to the right of nMaybeRunnableStackRoots are + // stackRoots of unmarked / not runnable goroutines + // The stackRoots array is re-partitioned after each marking phase iteration. stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -550,6 +569,24 @@ func GC() { releasem(mp) } +// FindGoleaks instructs the Go garbage collector to attempt +// goroutine leak detection during the next GC cycle. +// +// Only operates if goroutineleakfindergc is enabled in GOEXPERIMENT. +// Otherwise, it just runs runtime.GC(). +func FindGoLeaks() { + if !goexperiment.GoroutineLeakFinderGC { + GC() + return + } + + work.goroutineLeakFinder.pending.Store(true) + + for work.goroutineLeakFinder.pending.Load() { + GC() + } +} + // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has // already completed this mark phase, it returns immediately. func gcWaitOnMark(n uint32) { @@ -695,6 +732,10 @@ func gcStart(trigger gcTrigger) { mode = gcForceMode } else if debug.gcstoptheworld == 2 { mode = gcForceBlockMode + } else if work.goroutineLeakFinder.pending.Load() || debug.gcgoroutineleaks > 0 { + // If goroutine leak detection has been enabled (via GODEBUG=gcgoroutineleaks=1), + // or via profiling, stop the world during the marking phase. + mode = gcForceMode } // Ok, we're doing it! Stop everybody else @@ -772,6 +813,13 @@ func gcStart(trigger gcTrigger) { schedEnableUser(false) } + if work.goroutineLeakFinder.pending.Load() || + debug.gcgoroutineleaks > 0 { + work.goroutineLeakFinder.enabled = true + work.goroutineLeakFinder.pending.Store(false) + gcUntrackSyncObjects() + } + // Enter concurrent mark phase and enable // write barriers. // @@ -888,6 +936,9 @@ func gcMarkDone() { // Ensure only one thread is running the ragged barrier at a // time. semacquire(&work.markDoneSema) + if work.goroutineLeakFinder.enabled { + findMaybeRunnableGoroutines() + } top: // Re-check transition condition under transition lock. @@ -980,8 +1031,20 @@ top: } } }) - if restart { - gcDebugMarkDone.restartedDueTo27993 = true + + // Check whether we need to resume the marking phase because of issue #27993 + // or because of goroutine leak detection. + if restart || (work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done) { + if restart { + // Restart because of issue #27993. + gcDebugMarkDone.restartedDueTo27993 = true + } else { + // Marking has reached a fixed-point. Attempt to detect goroutine leaks. + // + // If the returned value is true, then detection was performed during this cycle. + // Otherwise, more runnable goroutines were discovered, requiring additional mark work. + work.goroutineLeakFinder.done = findGoleaks() + } getg().m.preemptoff = "" systemstack(func() { @@ -1032,6 +1095,150 @@ top: gcMarkTermination(stw) } +// checkIfMaybeRunnable checks whether a goroutine may still be semantically runnable. +// For goroutines which are semantically runnable, this will eventually return true +// as the GC marking phase progresses. It returns false for leaked goroutines, or for +// goroutines which are not yet computed as possibly runnable by the GC. +func (gp *g) checkIfMaybeRunnable() bool { + // Unmask the goroutine address to ensure we are not + // dereferencing a masked address. + switch gp.waitreason { + case waitReasonSelectNoCases, + waitReasonChanSendNilChan, + waitReasonChanReceiveNilChan: + // Select with no cases or communicating on nil channels + // make goroutines unrunnable by definition. + return false + case waitReasonChanReceive, + waitReasonSelect, + waitReasonChanSend: + // Cycle all through all *sudog to check whether + // the goroutine is waiting on a marked channel. + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + if isMarkedOrNotInHeap(unsafe.Pointer(sg.c.get())) { + return true + } + } + return false + case waitReasonSyncCondWait, + waitReasonSyncWaitGroupWait, + waitReasonSyncMutexLock, + waitReasonSyncRWMutexLock, + waitReasonSyncRWMutexRLock: + // If waiting on mutexes, wait groups, or condition variables, + // check if the synchronization primitive attached to the sudog is marked. + if gp.waiting != nil { + // Unmask the sema address and check if it's marked. + return isMarkedOrNotInHeap(gp.waiting.elem.get()) + } + } + return true +} + +// findMaybeRunnableGoroutines checks to see if more blocked but maybe-runnable goroutines exist. +// If so, it adds them into root set and increments work.markrootJobs accordingly. +// Returns true if we need to run another phase of markroots; returns false otherwise. +func findMaybeRunnableGoroutines() (moreWork bool) { + oldRootJobs := work.markrootJobs.Load() + + // To begin with we have a set of unchecked stackRoots between + // vIndex and ivIndex. During the loop, anything < vIndex should be + // valid stackRoots and anything >= ivIndex should be invalid stackRoots. + // The loop terminates when the two indices meet. + var vIndex, ivIndex int = work.nMaybeRunnableStackRoots, work.nStackRoots + // Reorder goroutine list + for vIndex < ivIndex { + if work.stackRoots[vIndex].checkIfMaybeRunnable() { + vIndex = vIndex + 1 + continue + } + for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { + if gp := work.stackRoots[ivIndex]; gp.checkIfMaybeRunnable() { + work.stackRoots[ivIndex] = work.stackRoots[vIndex] + work.stackRoots[vIndex] = gp + vIndex = vIndex + 1 + break + } + } + } + + newRootJobs := work.baseStacks + uint32(vIndex) + if newRootJobs > oldRootJobs { + work.nMaybeRunnableStackRoots = vIndex + work.markrootJobs.Store(newRootJobs) + } + return newRootJobs > oldRootJobs +} + +// getSyncObjectsUnreachable scans allgs and sets the elem and c fields of all sudogs to +// an untrackable pointer. This prevents the GC from marking these objects as live in memory +// by following these pointers when runnning deadlock detection. +func gcUntrackSyncObjects() { + assertWorldStopped() + + forEachGRace(func(gp *g) { + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + sg.elem.setUntraceable() + sg.c.setUntraceable() + } + }) +} + +// gcRestoreSyncObjects restores the elem and c fields of all sudogs to their original values. +// Should be invoked after the goroutine leak detection phase. +// +//go:nosplit +func gcRestoreSyncObjects() { + assertWorldStopped() + + forEachGRace(func(gp *g) { + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + sg.elem.setTraceable() + sg.c.setTraceable() + } + }) +} + +// findGoleaks scans the remaining stackRoots and marks any which are +// blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). +// Returns true if the goroutine leak check was performed (or unnecessary). +// Returns false if the GC cycle has not yet computed all maybe-runnable goroutines. +func findGoleaks() bool { + // Report goroutine leaks and mark them unreachable, and resume marking + // we still need to mark these unreachable *g structs as they + // get reused, but their stack won't get scanned + if work.nMaybeRunnableStackRoots == work.nStackRoots { + // nMaybeRunnableStackRoots == nStackRoots means that all goroutines are marked. + return true + } + + // Check whether any more maybe-runnable goroutines can be found by the GC. + if findMaybeRunnableGoroutines() { + // We found more work, so we need to resume the marking phase. + return false + } + + // For the remaining goroutines, mark them as unreachable and leaked. + for i := work.nMaybeRunnableStackRoots; i < work.nStackRoots; i++ { + gp := work.stackRoots[i] + casgstatus(gp, _Gwaiting, _Gleaked) + fn := findfunc(gp.startpc) + if fn.valid() { + print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ", + "[", waitReasonStrings[gp.waitreason], "]\n") + } else { + print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ", + "[", waitReasonStrings[gp.waitreason], "]\n") + } + traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp) + println() + } + // Put the remaining roots as ready for marking and drain them. + work.markrootJobs.Add(int32(work.nStackRoots - work.nMaybeRunnableStackRoots)) + work.nMaybeRunnableStackRoots = work.nStackRoots + return true +} + // World must be stopped and mark assists and background workers must be // disabled. func gcMarkTermination(stw worldStop) { @@ -1184,7 +1391,16 @@ func gcMarkTermination(stw worldStop) { throw("non-concurrent sweep failed to drain all sweep queues") } + if work.goroutineLeakFinder.enabled { + // Restore the elem and c fields of all sudogs to their original values. + gcRestoreSyncObjects() + } + systemstack(func() { + // Pull the GC out of goroutine leak detection mode. + work.goroutineLeakFinder.enabled = false + work.goroutineLeakFinder.done = false + // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as // sched.idleTime and sched.totaltime. memstats also include @@ -1258,7 +1474,11 @@ func gcMarkTermination(stw worldStop) { printlock() print("gc ", memstats.numgc, " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", - util, "%: ") + util, "%") + if work.goroutineLeakFinder.done { + print(" (goroutine leak finder GC)") + } + print(": ") prev := work.tSweepTerm for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { if i != 0 { @@ -1612,7 +1832,7 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if work.markrootNext < work.markrootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { return true // root scan work available } return false @@ -1628,8 +1848,8 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() { - print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n") + if work.full != 0 || work.markrootNext.Load() < work.markrootJobs.Load() { + print("runtime: full=", hex(work.full), " next=", work.markrootNext.Load(), " jobs=", work.markrootJobs.Load(), " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index a136c7aeaceda2..b416e0c43c7ec3 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -53,6 +53,56 @@ const ( pagesPerSpanRoot = 512 ) +// internalBlocked returns true if the goroutine is blocked due to an +// internal (non-leaking) waitReason, e.g. waiting for the netpoller or garbage collector. +// Such goroutines are never leak detection candidates according to the GC. +// +//go:nosplit +func (gp *g) internalBlocked() bool { + reason := gp.waitreason + return reason < waitReasonChanReceiveNilChan || waitReasonSyncWaitGroupWait < reason +} + +// The world must be stopped or allglock must be held. +// go through the snapshot of allgs, putting them into an arrays, +// separated by index, where [0:blockedIndex] contains only running Gs +// allGs[blockedIndex:] contain only blocking Gs +// To avoid GC from marking and scanning the blocked Gs by scanning +// the returned array (which is heap allocated), we mask the highest +// bit of the pointers to Gs with gcBitMask. +func allGsSnapshotSortedForGC() ([]*g, int) { + assertWorldStoppedOrLockHeld(&allglock) + + // Reset the status of leaked goroutines in order to improve + // the precision of goroutine leak detection. + for _, gp := range allgs { + gp.atomicstatus.CompareAndSwap(_Gleaked, _Gwaiting) + } + + allgsSorted := make([]*g, len(allgs)) + + // Indices cutting off runnable and blocked Gs. + var currIndex, blockedIndex = 0, len(allgsSorted) - 1 + for _, gp := range allgs { + // not sure if we need atomic load because we are stopping the world, + // but do it just to be safe for now + if status := readgstatus(gp); status != _Gwaiting || gp.internalBlocked() { + allgsSorted[currIndex] = gp + currIndex++ + } else { + allgsSorted[blockedIndex] = gp + blockedIndex-- + } + } + + // Because the world is stopped or allglock is held, allgadd + // cannot happen concurrently with this. allgs grows + // monotonically and existing entries never change, so we can + // simply return a copy of the slice header. For added safety, + // we trim everything past len because that can still change. + return allgsSorted, blockedIndex + 1 +} + // gcPrepareMarkRoots queues root scanning jobs (stacks, globals, and // some miscellany) and initializes scanning-related state. // @@ -102,11 +152,20 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - work.stackRoots = allGsSnapshot() + if work.goroutineLeakFinder.enabled { + // goroutine leak finder GC --- only prepare runnable + // goroutines for marking. + work.stackRoots, work.nMaybeRunnableStackRoots = allGsSnapshotSortedForGC() + } else { + // regular GC --- scan every goroutine + work.stackRoots = allGsSnapshot() + work.nMaybeRunnableStackRoots = len(work.stackRoots) + } + work.nStackRoots = len(work.stackRoots) - work.markrootNext = 0 - work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) + work.markrootNext.Store(0) + work.markrootJobs.Store(uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nMaybeRunnableStackRoots)) // Calculate base indexes of each root type work.baseData = uint32(fixedRootCount) @@ -119,8 +178,8 @@ func gcPrepareMarkRoots() { // gcMarkRootCheck checks that all roots have been scanned. It is // purely for debugging. func gcMarkRootCheck() { - if work.markrootNext < work.markrootJobs { - print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") + if work.markrootNext.Load() < work.markrootJobs.Load() { + print(work.markrootNext.Load(), " of ", work.markrootJobs.Load(), " markroot jobs done\n") throw("left over markroot jobs") } @@ -868,7 +927,7 @@ func scanstack(gp *g, gcw *gcWork) int64 { case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") throw("scanstack: goroutine not stopped") - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // ok } @@ -1136,6 +1195,30 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) } +func gcUpdateMarkrootNext() (uint32, bool) { + var success bool + next, jobs := work.markrootNext.Load(), work.markrootJobs.Load() + + if next < jobs { + // still work available at the moment + for !success { + success = work.markrootNext.CompareAndSwap(next, next+1) + // We manage to snatch a root job. Return the root index. + if success { + return next, true + } + + // Get the latest value of markrootNext. + next = work.markrootNext.Load() + // We are out of markroot jobs. + if next >= jobs { + break + } + } + } + return 0, false +} + // gcDrain scans roots and objects in work buffers, blackening grey // objects until it is unable to get more work. It may return before // GC is done; it's the caller's responsibility to balance work from @@ -1194,13 +1277,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { } } - // Drain root marking jobs. - if work.markrootNext < work.markrootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job >= work.markrootJobs { + job, success := gcUpdateMarkrootNext() + if !success { break } markroot(gcw, job, flushBgCredit) @@ -1346,9 +1428,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { wbBufFlush() if b = gcw.tryGetObj(); b == 0 { // Try to do a root job. - if work.markrootNext < work.markrootJobs { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job < work.markrootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { + job, success := gcUpdateMarkrootNext() + if success { workFlushed += markroot(gcw, job, false) continue } @@ -1512,6 +1594,7 @@ func scanobject(b uintptr, gcw *gcWork) { // At this point we have extracted the next potential pointer. // Quickly filter out nil and pointers back to the current object. + // The GC will skip masked addresses if GoroutineLeakFinderGC is enabled. if obj != 0 && obj-b >= n { // Test if obj points into the Go heap and, if so, // mark the object. diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index c41c3558359c0c..586abc433ffc0b 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -160,7 +160,7 @@ func suspendG(gp *g) suspendGState { s = _Gwaiting fallthrough - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // Claim goroutine by setting scan bit. // This may race with execution or readying of gp. // The scan bit keeps it from transition state. @@ -269,6 +269,7 @@ func resumeG(state suspendGState) { case _Grunnable | _Gscan, _Gwaiting | _Gscan, + _Gleaked | _Gscan, _Gsyscall | _Gscan: casfrom_Gscanstatus(gp, s, s&^_Gscan) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ec66384a75fa1b..1d3f73099e8660 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -513,7 +513,7 @@ func acquireSudog() *sudog { s := pp.sudogcache[n-1] pp.sudogcache[n-1] = nil pp.sudogcache = pp.sudogcache[:n-1] - if s.elem != nil { + if s.elem.get() != nil { throw("acquireSudog: found s.elem != nil in cache") } releasem(mp) @@ -522,7 +522,7 @@ func acquireSudog() *sudog { //go:nosplit func releaseSudog(s *sudog) { - if s.elem != nil { + if s.elem.get() != nil { throw("runtime: sudog with non-nil elem") } if s.isSelect { @@ -537,7 +537,7 @@ func releaseSudog(s *sudog) { if s.waitlink != nil { throw("runtime: sudog with non-nil waitlink") } - if s.c != nil { + if s.c.get() != nil { throw("runtime: sudog with non-nil c") } gp := getg() @@ -1208,6 +1208,7 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { _Gscanwaiting, _Gscanrunning, _Gscansyscall, + _Gscanleaked, _Gscanpreempted: if newval == oldval&^_Gscan { success = gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -1228,6 +1229,7 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { case _Grunnable, _Grunning, _Gwaiting, + _Gleaked, _Gsyscall: if newval == oldval|_Gscan { r := gp.atomicstatus.CompareAndSwap(oldval, newval) diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 424745d2357dc9..0ebef6d6e2adb1 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -316,6 +316,7 @@ var debug struct { dontfreezetheworld int32 efence int32 gccheckmark int32 + gcgoroutineleaks int32 gcpacertrace int32 gcshrinkstackoff int32 gcstoptheworld int32 @@ -381,6 +382,7 @@ var dbgvars = []*dbgVar{ {name: "efence", value: &debug.efence}, {name: "gccheckmark", value: &debug.gccheckmark}, {name: "gcpacertrace", value: &debug.gcpacertrace}, + {name: "gcgoroutineleaks", value: &debug.gcgoroutineleaks}, {name: "gcshrinkstackoff", value: &debug.gcshrinkstackoff}, {name: "gcstoptheworld", value: &debug.gcstoptheworld}, {name: "gctrace", value: &debug.gctrace}, diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ee07c1ed930e4a..adb947a11d980b 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,6 +87,9 @@ const ( // ready()ing this G. _Gpreempted // 9 + // _Gleaked represents a leaked goroutine caught by the GC. + _Gleaked // 10 + // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The // goroutine is not executing user code and the stack is owned @@ -104,6 +107,7 @@ const ( _Gscansyscall = _Gscan + _Gsyscall // 0x1003 _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 + _Gscanleaked = _Gscan + _Gleaked // 0x100a ) const ( @@ -315,6 +319,78 @@ type gobuf struct { bp uintptr // for framepointer-enabled architectures } +// maybeTraceablePtr is a special pointer that is conditionally trackable +// by the GC. It consists of an address as a uintptr (vu) and a pointer +// to a data element (vp). +// +// maybeTraceablePtr values can be in one of three states: +// 1. Unset: vu == 0 && vp == nil +// 2. Untracked: vu != 0 && vp == nil +// 3. Tracked: vu != 0 && vp != nil +// +// Do not set fields manually. Use methods instead. +// Extend this type with additional methods if needed. +type maybeTraceablePtr struct { + vp unsafe.Pointer // For liveness only. + vu uintptr // Source of truth. +} + +// untrack unsets the pointer but preserves the address. +// This is used to hide the pointer from the GC. +// +//go:nosplit +func (p *maybeTraceablePtr) setUntraceable() { + p.vp = nil +} + +// setTraceable resets the pointer to the stored address. +// This is used to make the pointer visible to the GC. +// +//go:nosplit +func (p *maybeTraceablePtr) setTraceable() { + p.vp = unsafe.Pointer(p.vu) +} + +// set sets the pointer to the data element and updates the address. +// +//go:nosplit +func (p *maybeTraceablePtr) set(v unsafe.Pointer) { + p.vp = v + p.vu = uintptr(v) +} + +// get retrieves the pointer to the data element. +// +//go:nosplit +func (p *maybeTraceablePtr) get() unsafe.Pointer { + return unsafe.Pointer(p.vu) +} + +// uintptr returns the uintptr address of the pointer. +// +//go:nosplit +func (p *maybeTraceablePtr) uintptr() uintptr { + return p.vu +} + +// maybeTraceableChan extends conditionally trackable pointers (maybeTraceablePtr) +// to track hchan pointers. +// +// Do not set fields manually. Use methods instead. +type maybeTraceableChan struct { + maybeTraceablePtr +} + +//go:nosplit +func (p *maybeTraceableChan) set(c *hchan) { + p.maybeTraceablePtr.set(unsafe.Pointer(c)) +} + +//go:nosplit +func (p *maybeTraceableChan) get() *hchan { + return (*hchan)(p.maybeTraceablePtr.get()) +} + // sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving // on a channel. // @@ -334,7 +410,8 @@ type sudog struct { next *sudog prev *sudog - elem unsafe.Pointer // data element (may point to stack) + + elem maybeTraceablePtr // data element (may point to stack) // The following fields are never accessed concurrently. // For channels, waitlink is only accessed by g. @@ -362,10 +439,10 @@ type sudog struct { // in the second entry in the list.) waiters uint16 - parent *sudog // semaRoot binary tree - waitlink *sudog // g.waiting list or semaRoot - waittail *sudog // semaRoot - c *hchan // channel + parent *sudog // semaRoot binary tree + waitlink *sudog // g.waiting list or semaRoot + waittail *sudog // semaRoot + c maybeTraceableChan // channel } type libcall struct { @@ -1057,24 +1134,24 @@ const ( waitReasonZero waitReason = iota // "" waitReasonGCAssistMarking // "GC assist marking" waitReasonIOWait // "IO wait" - waitReasonChanReceiveNilChan // "chan receive (nil chan)" - waitReasonChanSendNilChan // "chan send (nil chan)" waitReasonDumpingHeap // "dumping heap" waitReasonGarbageCollection // "garbage collection" waitReasonGarbageCollectionScan // "garbage collection scan" waitReasonPanicWait // "panicwait" - waitReasonSelect // "select" - waitReasonSelectNoCases // "select (no cases)" waitReasonGCAssistWait // "GC assist wait" waitReasonGCSweepWait // "GC sweep wait" waitReasonGCScavengeWait // "GC scavenge wait" - waitReasonChanReceive // "chan receive" - waitReasonChanSend // "chan send" waitReasonFinalizerWait // "finalizer wait" waitReasonForceGCIdle // "force gc (idle)" waitReasonUpdateGOMAXPROCSIdle // "GOMAXPROCS updater (idle)" waitReasonSemacquire // "semacquire" waitReasonSleep // "sleep" + waitReasonChanReceiveNilChan // "chan receive (nil chan)" + waitReasonChanSendNilChan // "chan send (nil chan)" + waitReasonSelect // "select" + waitReasonSelectNoCases // "select (no cases)" + waitReasonChanReceive // "chan receive" + waitReasonChanSend // "chan send" waitReasonSyncCondWait // "sync.Cond.Wait" waitReasonSyncMutexLock // "sync.Mutex.Lock" waitReasonSyncRWMutexRLock // "sync.RWMutex.RLock" @@ -1160,12 +1237,24 @@ func (w waitReason) String() string { return waitReasonStrings[w] } +// isMutexWait returns true if the goroutine is blocked because of +// sync.Mutex.Lock or sync.RWMutex.[R]Lock. +// +//go:nosplit func (w waitReason) isMutexWait() bool { return w == waitReasonSyncMutexLock || w == waitReasonSyncRWMutexRLock || w == waitReasonSyncRWMutexLock } +// isSyncWait returns true if the goroutine is blocked because of +// sync library primitive operations. +// +//go:nosplit +func (w waitReason) isSyncWait() bool { + return waitReasonSyncCondWait <= w && w <= waitReasonSyncWaitGroupWait +} + func (w waitReason) isWaitingForSuspendG() bool { return isWaitingForSuspendG[w] } diff --git a/src/runtime/select.go b/src/runtime/select.go index ae7754b17377dd..d94a08c2651082 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -83,7 +83,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { // channels in lock order. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc && lastc != nil { + if sg.c.get() != lastc && lastc != nil { // As soon as we unlock the channel, fields in // any sudog with that channel may change, // including c and waitlink. Since multiple @@ -92,7 +92,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { // of a channel. unlock(&lastc.lock) } - lastc = sg.c + lastc = sg.c.get() } if lastc != nil { unlock(&lastc.lock) @@ -320,12 +320,12 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, blo sg.isSelect = true // No stack splits between assigning elem and enqueuing // sg on gp.waiting where copystack can find it. - sg.elem = cas.elem + sg.elem.set(cas.elem) sg.releasetime = 0 if t0 != 0 { sg.releasetime = -1 } - sg.c = c + sg.c.set(c) // Construct waiting list in lock order. *nextp = sg nextp = &sg.waitlink @@ -368,8 +368,8 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, blo // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { sg1.isSelect = false - sg1.elem = nil - sg1.c = nil + sg1.elem.set(nil) + sg1.c.set(nil) } gp.waiting = nil diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 6af49b1b0c42d9..833829f70ed219 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -21,6 +21,7 @@ package runtime import ( "internal/cpu" + "internal/goexperiment" "internal/runtime/atomic" "unsafe" ) @@ -188,7 +189,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes i } // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. - root.queue(addr, s, lifo) + root.queue(addr, s, lifo, reason.isSyncWait()) goparkunlock(&root.lock, reason, traceBlockSync, 4+skipframes) if s.ticket != 0 || cansemacquire(addr) { break @@ -301,9 +302,16 @@ func cansemacquire(addr *uint32) bool { } // queue adds s to the blocked goroutines in semaRoot. -func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { +func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() - s.elem = unsafe.Pointer(addr) + s.elem.set(unsafe.Pointer(addr)) + if goexperiment.GoroutineLeakFinderGC && syncSema { + s.g.waiting = s + // When dealing with sync semaphores, hide the elem field from the GC + // to prevent it from prematurely marking the semaphore when running + // goroutine leak detection. + s.elem.setUntraceable() + } s.next = nil s.prev = nil s.waiters = 0 @@ -311,7 +319,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - if t.elem == unsafe.Pointer(addr) { + if uintptr(unsafe.Pointer(addr)) == t.elem.uintptr() { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -357,7 +365,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { return } last = t - if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) { + if uintptr(unsafe.Pointer(addr)) < t.elem.uintptr() { pt = &t.prev } else { pt = &t.next @@ -402,11 +410,13 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) { ps := &root.treap s := *ps + for ; s != nil; s = *ps { - if s.elem == unsafe.Pointer(addr) { + if uintptr(unsafe.Pointer(addr)) == s.elem.uintptr() { goto Found } - if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) { + + if uintptr(unsafe.Pointer(addr)) < s.elem.uintptr() { ps = &s.prev } else { ps = &s.next @@ -470,8 +480,12 @@ Found: } tailtime = s.acquiretime } + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear the waiting pointer. + s.g.waiting = nil + } s.parent = nil - s.elem = nil + s.elem.set(nil) s.next = nil s.prev = nil s.ticket = 0 @@ -590,6 +604,14 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() + if goexperiment.GoroutineLeakFinderGC { + // Storing this pointer (invisible to GC) so that we can trace + // the condvar address from the blocked goroutine when + // checking for goroutine leaks. + s.elem.set(unsafe.Pointer(l)) + s.elem.setUntraceable() + s.g.waiting = s + } s.ticket = t s.releasetime = 0 t0 := int64(0) @@ -607,6 +629,12 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear up its waiting pointer, + // and clean up the sudog before releasing it. + s.g.waiting = nil + s.elem.set(nil) + } releaseSudog(s) } diff --git a/src/runtime/sizeof_test.go b/src/runtime/sizeof_test.go index a5dc8aed3443bc..23adcfb93ea4fd 100644 --- a/src/runtime/sizeof_test.go +++ b/src/runtime/sizeof_test.go @@ -20,8 +20,8 @@ func TestSizeof(t *testing.T) { _32bit uintptr // size on 32bit platforms _64bit uintptr // size on 64bit platforms }{ - {runtime.G{}, 280, 440}, // g, but exported for testing - {runtime.Sudog{}, 56, 88}, // sudog, but exported for testing + {runtime.G{}, 280, 440}, // g, but exported for testing + {runtime.Sudog{}, 64, 104}, // sudog, but exported for testing } for _, tt := range tests { diff --git a/src/runtime/stack.go b/src/runtime/stack.go index a338708d76fca8..8c78965d372223 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -821,7 +821,8 @@ func adjustsudogs(gp *g, adjinfo *adjustinfo) { // the data elements pointed to by a SudoG structure // might be in the stack. for s := gp.waiting; s != nil; s = s.waitlink { - adjustpointer(adjinfo, unsafe.Pointer(&s.elem)) + adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vu)) + adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vp)) } } @@ -834,7 +835,7 @@ func fillstack(stk stack, b byte) { func findsghi(gp *g, stk stack) uintptr { var sghi uintptr for sg := gp.waiting; sg != nil; sg = sg.waitlink { - p := uintptr(sg.elem) + uintptr(sg.c.elemsize) + p := sg.elem.uintptr() + uintptr(sg.c.get().elemsize) if stk.lo <= p && p < stk.hi && p > sghi { sghi = p } @@ -853,7 +854,7 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Lock channels to prevent concurrent send/receive. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc { + if sg.c.get() != lastc { // There is a ranking cycle here between gscan bit and // hchan locks. Normally, we only allow acquiring hchan // locks and then getting a gscan bit. In this case, we @@ -863,9 +864,9 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // suspended. So, we get a special hchan lock rank here // that is lower than gscan, but doesn't allow acquiring // any other locks other than hchan. - lockWithRank(&sg.c.lock, lockRankHchanLeaf) + lockWithRank(&sg.c.get().lock, lockRankHchanLeaf) } - lastc = sg.c + lastc = sg.c.get() } // Adjust sudogs. @@ -885,10 +886,10 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Unlock channels. lastc = nil for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc { - unlock(&sg.c.lock) + if sg.c.get() != lastc { + unlock(&sg.c.get().lock) } - lastc = sg.c + lastc = sg.c.get() } return sgsize diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go new file mode 100644 index 00000000000000..cc06c4b77541ca --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go @@ -0,0 +1,133 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/10214 + * Buggy version: 7207111aa3a43df0552509365fdec741a53f873f + * fix commit-id: 27e863d90ab0660494778f1c35966cc5ddc38e32 + * Flaky: 3/100 + * Description: This deadlock is caused by different order when acquiring + * coalescedMu.Lock() and raftMu.Lock(). The fix is to refactor sendQueuedHeartbeats() + * so that cockroachdb can unlock coalescedMu before locking raftMu. + */ +package main + +import ( + "runtime" + "sync" + "time" + "unsafe" +) + +func init() { + register("Cockroach10214", Cockroach10214) +} + +type Store_cockroach10214 struct { + coalescedMu struct { + sync.Mutex + heartbeatResponses []int + } + mu struct { + replicas map[int]*Replica_cockroach10214 + } +} + +func (s *Store_cockroach10214) sendQueuedHeartbeats() { + s.coalescedMu.Lock() // LockA acquire + runtime.Gosched() + defer s.coalescedMu.Unlock() + for i := 0; i < len(s.coalescedMu.heartbeatResponses); i++ { + s.sendQueuedHeartbeatsToNode() // LockB + } + // LockA release +} + +func (s *Store_cockroach10214) sendQueuedHeartbeatsToNode() { + for i := 0; i < len(s.mu.replicas); i++ { + r := s.mu.replicas[i] + r.reportUnreachable() // LockB + } +} + +type Replica_cockroach10214 struct { + raftMu sync.Mutex + mu sync.Mutex + store *Store_cockroach10214 +} + +func (r *Replica_cockroach10214) reportUnreachable() { + r.raftMu.Lock() // LockB acquire + runtime.Gosched() + //+time.Sleep(time.Nanosecond) + defer r.raftMu.Unlock() + // LockB release +} + +func (r *Replica_cockroach10214) tick() { + r.raftMu.Lock() // LockB acquire + runtime.Gosched() + defer r.raftMu.Unlock() + r.tickRaftMuLocked() + // LockB release +} + +func (r *Replica_cockroach10214) tickRaftMuLocked() { + r.mu.Lock() + defer r.mu.Unlock() + if r.maybeQuiesceLocked() { + return + } +} +func (r *Replica_cockroach10214) maybeQuiesceLocked() bool { + for i := 0; i < 2; i++ { + if !r.maybeCoalesceHeartbeat() { + return true + } + } + return false +} +func (r *Replica_cockroach10214) maybeCoalesceHeartbeat() bool { + msgtype := uintptr(unsafe.Pointer(r)) % 3 + switch msgtype { + case 0, 1, 2: + r.store.coalescedMu.Lock() // LockA acquire + default: + return false + } + r.store.coalescedMu.Unlock() // LockA release + return true +} + +func Cockroach10214() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go func() { + store := &Store_cockroach10214{} + responses := &store.coalescedMu.heartbeatResponses + *responses = append(*responses, 1, 2) + store.mu.replicas = make(map[int]*Replica_cockroach10214) + + rp1 := &Replica_cockroach10214{ + store: store, + } + rp2 := &Replica_cockroach10214{ + store: store, + } + store.mu.replicas[0] = rp1 + store.mu.replicas[1] = rp2 + + go func() { + // deadlocks: x > 0 + store.sendQueuedHeartbeats() + }() + + go func() { + // deadlocks: x > 0 + rp1.tick() + }() + + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go new file mode 100644 index 00000000000000..f479a29dc36855 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go @@ -0,0 +1,148 @@ +package main + +import ( + "runtime" + "sync" + "sync/atomic" + "time" +) + +func init() { + register("Cockroach1055", Cockroach1055) +} + +type Stopper_cockroach1055 struct { + stopper chan struct{} + stop sync.WaitGroup + mu sync.Mutex + draining int32 + drain sync.WaitGroup +} + +func (s *Stopper_cockroach1055) AddWorker() { + s.stop.Add(1) +} + +func (s *Stopper_cockroach1055) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach1055) SetStopped() { + if s != nil { + s.stop.Done() + } +} + +func (s *Stopper_cockroach1055) Quiesce() { + s.mu.Lock() + defer s.mu.Unlock() + s.draining = 1 + s.drain.Wait() + s.draining = 0 +} + +func (s *Stopper_cockroach1055) Stop() { + s.mu.Lock() // L1 + defer s.mu.Unlock() + atomic.StoreInt32(&s.draining, 1) + s.drain.Wait() + close(s.stopper) + s.stop.Wait() +} + +func (s *Stopper_cockroach1055) StartTask() bool { + if atomic.LoadInt32(&s.draining) == 0 { + s.mu.Lock() + defer s.mu.Unlock() + s.drain.Add(1) + return true + } + return false +} + +func NewStopper_cockroach1055() *Stopper_cockroach1055 { + return &Stopper_cockroach1055{ + stopper: make(chan struct{}), + } +} + +func Cockroach1055() { + defer func() { + time.Sleep(1 * time.Second) + runtime.GC() + }() + + for i := 0; i <= 1000; i++ { + go func() { // G1 + // deadlocks: x > 0 + var stoppers []*Stopper_cockroach1055 + for i := 0; i < 2; i++ { + stoppers = append(stoppers, NewStopper_cockroach1055()) + } + + for i := range stoppers { + s := stoppers[i] + s.AddWorker() + go func() { // G2 + // deadlocks: x > 0 + s.StartTask() + <-s.ShouldStop() + s.SetStopped() + }() + } + + done := make(chan struct{}) + go func() { // G3 + // deadlocks: x > 0 + for _, s := range stoppers { + s.Quiesce() + } + for _, s := range stoppers { + s.Stop() + } + close(done) + }() + + <-done + }() + } +} + +// Example deadlock trace: +// +// G1 G2.0 G2.1 G2.2 G3 +// --------------------------------------------------------------------------------------------------------------------- +// s[0].stop.Add(1) [1] +// go func() [G2.0] +// s[1].stop.Add(1) [1] . +// go func() [G2.1] . +// s[2].stop.Add(1) [1] . . +// go func() [G2.2] . . +// go func() [G3] . . . +// <-done . . . . +// . s[0].StartTask() . . . +// . s[0].draining == 0 . . . +// . . s[1].StartTask() . . +// . . s[1].draining == 0 . . +// . . . s[2].StartTask() . +// . . . s[2].draining == 0 . +// . . . . s[0].Quiesce() +// . . . . s[0].mu.Lock() [L1[0]] +// . s[0].mu.Lock() [L1[0]] . . . +// . s[0].drain.Add(1) [1] . . . +// . s[0].mu.Unlock() [L1[0]] . . . +// . <-s[0].ShouldStop() . . . +// . . . . s[0].draining = 1 +// . . . . s[0].drain.Wait() +// . . s[0].mu.Lock() [L1[1]] . . +// . . s[1].drain.Add(1) [1] . . +// . . s[1].mu.Unlock() [L1[1]] . . +// . . <-s[1].ShouldStop() . . +// . . . s[2].mu.Lock() [L1[2]] . +// . . . s[2].drain.Add() [1] . +// . . . s[2].mu.Unlock() [L1[2]] . +// . . . <-s[2].ShouldStop() . +// ----------------------------------------------------G1, G2.[0..2], G3 leak------------------------------------------------ diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go new file mode 100644 index 00000000000000..57c0d0c81bb4d4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go @@ -0,0 +1,125 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/10790 + * Buggy version: 96b5452557ebe26bd9d85fe7905155009204d893 + * fix commit-id: f1a5c19125c65129b966fbdc0e6408e8df214aba + * Flaky: 28/100 + * Description: + * It is possible that a message from ctxDone will make the function beginCmds + * returns without draining the channel ch, so that goroutines created by anonymous + * function will leak. + */ + +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach10790", Cockroach10790) +} + +type Stopper_cockroach10790 struct { + quiescer chan struct{} + mu struct { + sync.Mutex + quiescing bool + } +} + +func (s *Stopper_cockroach10790) ShouldQuiesce() <-chan struct{} { + if s == nil { + return nil + } + return s.quiescer +} + +func (s *Stopper_cockroach10790) Quiesce() { + s.mu.Lock() + defer s.mu.Unlock() + if !s.mu.quiescing { + s.mu.quiescing = true + close(s.quiescer) + } +} + +func (s *Stopper_cockroach10790) Stop() { + s.Quiesce() +} + +type Replica_cockroach10790 struct { + chans []chan bool + stopper *Stopper_cockroach10790 +} + +func (r *Replica_cockroach10790) beginCmds(ctx context.Context) { + ctxDone := ctx.Done() + for _, ch := range r.chans { + select { + case <-ch: + case <-ctxDone: + go func() { + // deadlocks: x > 0 + for _, ch := range r.chans { + <-ch + } + }() + } + } +} + +func (r *Replica_cockroach10790) sendChans(ctx context.Context) { + for _, ch := range r.chans { + select { + case ch <- true: + case <-ctx.Done(): + return + } + } +} + +func NewReplica_cockroach10790() *Replica_cockroach10790 { + r := &Replica_cockroach10790{ + stopper: &Stopper_cockroach10790{ + quiescer: make(chan struct{}), + }, + } + r.chans = append(r.chans, make(chan bool)) + r.chans = append(r.chans, make(chan bool)) + return r +} + +/// +/// G1 G2 helper goroutine +/// r.sendChans() +/// r.beginCmds() +/// ch1 <- true +/// <- ch1 +/// ch2 <- true +/// ... ... ... +/// cancel() +/// <- ch1 +/// ------------------G1 leak-------------------------- +/// + +func Cockroach10790() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + r := NewReplica_cockroach10790() + ctx, cancel := context.WithCancel(context.Background()) + go r.sendChans(ctx) // helper goroutine + go r.beginCmds(ctx) // G1 + go cancel() // G2 + r.stopper.Stop() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go new file mode 100644 index 00000000000000..1745df4dbc66a0 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go @@ -0,0 +1,71 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/13197 + * Buggy version: fff27aedabafe20cef57f75905fe340cab48c2a4 + * fix commit-id: 9bf770cd8f6eaff5441b80d3aec1a5614e8747e1 + * Flaky: 100/100 + * Description: One goroutine executing (*Tx).awaitDone() blocks and + * waiting for a signal context.Done(). + */ +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Cockroach13197", Cockroach13197) +} + +type DB_cockroach13197 struct{} + +func (db *DB_cockroach13197) begin(ctx context.Context) *Tx_cockroach13197 { + ctx, cancel := context.WithCancel(ctx) + tx := &Tx_cockroach13197{ + cancel: cancel, + ctx: ctx, + } + // deadlocks: 1 + go tx.awaitDone() // G2 + return tx +} + +type Tx_cockroach13197 struct { + cancel context.CancelFunc + ctx context.Context +} + +func (tx *Tx_cockroach13197) awaitDone() { + <-tx.ctx.Done() +} + +func (tx *Tx_cockroach13197) Rollback() { + tx.rollback() +} + +func (tx *Tx_cockroach13197) rollback() { + tx.close() +} + +func (tx *Tx_cockroach13197) close() { + tx.cancel() +} + +/// G1 G2 +/// begin() +/// awaitDone() +/// <-tx.ctx.Done() +/// return +/// -----------G2 leak------------- + +func Cockroach13197() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + db := &DB_cockroach13197{} + db.begin(context.Background()) // G1 +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go new file mode 100644 index 00000000000000..690a5586f2df32 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go @@ -0,0 +1,58 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/13755 + * Buggy version: 7acb881bbb8f23e87b69fce9568d9a3316b5259c + * fix commit-id: ef906076adc1d0e3721944829cfedfed51810088 + * Flaky: 100/100 + * Description: The buggy code does not close the db query result (rows), + * so that one goroutine running (*Rows).awaitDone is blocked forever. + * The blocking goroutine is waiting for cancel signal from context. + */ + +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Cockroach13755", Cockroach13755) +} + +type Rows_cockroach13755 struct { + cancel context.CancelFunc +} + +func (rs *Rows_cockroach13755) initContextClose(ctx context.Context) { + ctx, rs.cancel = context.WithCancel(ctx) + // deadlocks: 1 + go rs.awaitDone(ctx) +} + +func (rs *Rows_cockroach13755) awaitDone(ctx context.Context) { + <-ctx.Done() + rs.close(ctx.Err()) +} + +func (rs *Rows_cockroach13755) close(err error) { + rs.cancel() +} + +/// G1 G2 +/// initContextClose() +/// awaitDone() +/// <-tx.ctx.Done() +/// return +/// ---------------G2 leak----------------- + +func Cockroach13755() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + rs := &Rows_cockroach13755{} + rs.initContextClose(context.Background()) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go new file mode 100644 index 00000000000000..b2365a8b95efdc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go @@ -0,0 +1,184 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach1462", Cockroach1462) +} + +type Stopper_cockroach1462 struct { + stopper chan struct{} + stopped chan struct{} + stop sync.WaitGroup + mu sync.Mutex + drain *sync.Cond + draining bool + numTasks int +} + +func NewStopper_cockroach1462() *Stopper_cockroach1462 { + s := &Stopper_cockroach1462{ + stopper: make(chan struct{}), + stopped: make(chan struct{}), + } + s.drain = sync.NewCond(&s.mu) + return s +} + +func (s *Stopper_cockroach1462) RunWorker(f func()) { + s.AddWorker() + go func() { // G2, G3 + defer s.SetStopped() + // deadlocks: x > 0 + f() + }() +} + +func (s *Stopper_cockroach1462) AddWorker() { + s.stop.Add(1) +} +func (s *Stopper_cockroach1462) StartTask() bool { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + if s.draining { + return false + } + s.numTasks++ + return true +} + +func (s *Stopper_cockroach1462) FinishTask() { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + s.numTasks-- + s.drain.Broadcast() +} +func (s *Stopper_cockroach1462) SetStopped() { + if s != nil { + s.stop.Done() + } +} +func (s *Stopper_cockroach1462) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach1462) Quiesce() { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + s.draining = true + for s.numTasks > 0 { + // Unlock s.mu, wait for the signal, and lock s.mu. + s.drain.Wait() + } +} + +func (s *Stopper_cockroach1462) Stop() { + s.Quiesce() + close(s.stopper) + s.stop.Wait() + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + close(s.stopped) +} + +type interceptMessage_cockroach1462 int + +type localInterceptableTransport_cockroach1462 struct { + mu sync.Mutex + Events chan interceptMessage_cockroach1462 + stopper *Stopper_cockroach1462 +} + +func (lt *localInterceptableTransport_cockroach1462) Close() {} + +type Transport_cockroach1462 interface { + Close() +} + +func NewLocalInterceptableTransport_cockroach1462(stopper *Stopper_cockroach1462) Transport_cockroach1462 { + lt := &localInterceptableTransport_cockroach1462{ + Events: make(chan interceptMessage_cockroach1462), + stopper: stopper, + } + lt.start() + return lt +} + +func (lt *localInterceptableTransport_cockroach1462) start() { + lt.stopper.RunWorker(func() { + for { + select { + case <-lt.stopper.ShouldStop(): + return + default: + lt.Events <- interceptMessage_cockroach1462(0) + } + } + }) +} + +func processEventsUntil_cockroach1462(ch <-chan interceptMessage_cockroach1462, stopper *Stopper_cockroach1462) { + for { + select { + case _, ok := <-ch: + runtime.Gosched() + if !ok { + return + } + case <-stopper.ShouldStop(): + return + } + } +} + +func Cockroach1462() { + defer func() { + time.Sleep(2000 * time.Millisecond) + runtime.GC() + }() + for i := 0; i <= 1000; i++ { + go func() { // G1 + // deadlocks: x > 0 + stopper := NewStopper_cockroach1462() + transport := NewLocalInterceptableTransport_cockroach1462(stopper).(*localInterceptableTransport_cockroach1462) + stopper.RunWorker(func() { + processEventsUntil_cockroach1462(transport.Events, stopper) + }) + stopper.Stop() + }() + } +} + +// Example of a deadlocking trace +// G1 G2 G3 +// --------------------------------------------------------------------------------------------------------------------- +// NewLocalInterceptableTransport() +// lt.start() +// lt.stopper.RunWorker() +// s.AddWorker() +// s.stop.Add(1) [1] +// go func() [G2] +// stopper.RunWorker() . +// s.AddWorker() . +// s.stop.Add(1) [2] . +// go func() [G3] . +// s.Stop() . . +// s.Quiesce() . . +// . select [default] . +// . lt.Events <- interceptMessage(0) . +// close(s.stopper) . . +// . . select [<-stopper.ShouldStop()] +// . . <<>> +// s.stop.Wait() . +// -----------------------------------------------------G1,G2 leak------------------------------------------------------ diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go new file mode 100644 index 00000000000000..7860b74b437850 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go @@ -0,0 +1,121 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/16167 + * Buggy version: 36fa784aa846b46c29e077634c4e362635f6e74a + * fix commit-id: d064942b067ab84628f79cbfda001fa3138d8d6e + * Flaky: 1/100 + * Description: + * This is another example for deadlock caused by recursively + * acquiring RWLock. There are two lock variables (systemConfigCond and systemConfigMu) + * involved in this bug, but they are actually the same lock, which can be found from + * the following code. + * There are two goroutine involved in this deadlock. The first goroutine acquires + * systemConfigMu.Lock() firstly, then tries to acquire systemConfigMu.RLock(). The + * second goroutine tries to acquire systemConfigMu.Lock(). If the second goroutine + * interleaves in between the two lock operations of the first goroutine, deadlock will happen. + */ + +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach16167", Cockroach16167) +} + +type PreparedStatements_cockroach16167 struct { + session *Session_cockroach16167 +} + +func (ps PreparedStatements_cockroach16167) New(e *Executor_cockroach16167) { + e.Prepare(ps.session) +} + +type Session_cockroach16167 struct { + PreparedStatements PreparedStatements_cockroach16167 +} + +func (s *Session_cockroach16167) resetForBatch(e *Executor_cockroach16167) { + e.getDatabaseCache() +} + +type Executor_cockroach16167 struct { + systemConfigCond *sync.Cond + systemConfigMu sync.RWMutex +} + +func (e *Executor_cockroach16167) Start() { + e.updateSystemConfig() +} + +func (e *Executor_cockroach16167) execParsed(session *Session_cockroach16167) { + e.systemConfigCond.L.Lock() // Same as e.systemConfigMu.RLock() + runtime.Gosched() + defer e.systemConfigCond.L.Unlock() + runTxnAttempt_cockroach16167(e, session) +} + +func (e *Executor_cockroach16167) execStmtsInCurrentTxn(session *Session_cockroach16167) { + e.execStmtInOpenTxn(session) +} + +func (e *Executor_cockroach16167) execStmtInOpenTxn(session *Session_cockroach16167) { + session.PreparedStatements.New(e) +} + +func (e *Executor_cockroach16167) Prepare(session *Session_cockroach16167) { + session.resetForBatch(e) +} + +func (e *Executor_cockroach16167) getDatabaseCache() { + e.systemConfigMu.RLock() + defer e.systemConfigMu.RUnlock() +} + +func (e *Executor_cockroach16167) updateSystemConfig() { + e.systemConfigMu.Lock() + runtime.Gosched() + defer e.systemConfigMu.Unlock() +} + +func runTxnAttempt_cockroach16167(e *Executor_cockroach16167, session *Session_cockroach16167) { + e.execStmtsInCurrentTxn(session) +} + +func NewExectorAndSession_cockroach16167() (*Executor_cockroach16167, *Session_cockroach16167) { + session := &Session_cockroach16167{} + session.PreparedStatements = PreparedStatements_cockroach16167{session} + e := &Executor_cockroach16167{} + return e, session +} + +/// G1 G2 +/// e.Start() +/// e.updateSystemConfig() +/// e.execParsed() +/// e.systemConfigCond.L.Lock() +/// e.systemConfigMu.Lock() +/// e.systemConfigMu.RLock() +/// ----------------------G1,G2 deadlock-------------------- + +func Cockroach16167() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + e, s := NewExectorAndSession_cockroach16167() + e.systemConfigCond = sync.NewCond(e.systemConfigMu.RLocker()) + // deadlocks: x > 0 + go e.Start() // G1 + e.execParsed(s) // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go new file mode 100644 index 00000000000000..e85007a675d1e4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go @@ -0,0 +1,73 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/18101 + * Buggy version: f7a8e2f57b6bcf00b9abaf3da00598e4acd3a57f + * fix commit-id: 822bd176cc725c6b50905ea615023200b395e14f + * Flaky: 100/100 + * Description: + * context.Done() signal only stops the goroutine who pulls data + * from a channel, while does not stops goroutines which send data + * to the channel. This causes all goroutines trying to send data + * through the channel to block. + */ + +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Cockroach18101", Cockroach18101) +} + +const chanSize_cockroach18101 = 6 + +func restore_cockroach18101(ctx context.Context) bool { + readyForImportCh := make(chan bool, chanSize_cockroach18101) + go func() { // G2 + defer close(readyForImportCh) + // deadlocks: x > 0 + splitAndScatter_cockroach18101(ctx, readyForImportCh) + }() + for readyForImportSpan := range readyForImportCh { + select { + case <-ctx.Done(): + return readyForImportSpan + } + } + return true +} + +func splitAndScatter_cockroach18101(ctx context.Context, readyForImportCh chan bool) { + for i := 0; i < chanSize_cockroach18101+2; i++ { + readyForImportCh <- (false || i != 0) + } +} + +/// +/// G1 G2 helper goroutine +/// restore() +/// splitAndScatter() +/// <-readyForImportCh +/// readyForImportCh<- +/// ... ... +/// cancel() +/// return +/// readyForImportCh<- +/// -----------------------G2 leak------------------------- +/// + +func Cockroach18101() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + ctx, cancel := context.WithCancel(context.Background()) + go restore_cockroach18101(ctx) // G1 + go cancel() // helper goroutine + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go new file mode 100644 index 00000000000000..7ab60bd11b2a60 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go @@ -0,0 +1,136 @@ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Cockroach2448", Cockroach2448) +} + +type Stopper_cockroach2448 struct { + Done chan bool +} + +func (s *Stopper_cockroach2448) ShouldStop() <-chan bool { + return s.Done +} + +type EventMembershipChangeCommitted_cockroach2448 struct { + Callback func() +} +type MultiRaft_cockroach2448 struct { + stopper *Stopper_cockroach2448 + Events chan interface{} + callbackChan chan func() +} + +// sendEvent can be invoked many times +func (m *MultiRaft_cockroach2448) sendEvent(event interface{}) { + select { + case m.Events <- event: // Waiting for events consumption + case <-m.stopper.ShouldStop(): + } +} + +type state_cockroach2448 struct { + *MultiRaft_cockroach2448 +} + +func (s *state_cockroach2448) start() { + for { + select { + case <-s.stopper.ShouldStop(): + return + case cb := <-s.callbackChan: + cb() + default: + s.handleWriteResponse() + time.Sleep(time.Millisecond) + } + } +} + +func (s *state_cockroach2448) handleWriteResponse() { + s.sendEvent(&EventMembershipChangeCommitted_cockroach2448{ + Callback: func() { + select { + case s.callbackChan <- func() { // Waiting for callbackChan consumption + time.Sleep(time.Nanosecond) + }: + case <-s.stopper.ShouldStop(): + } + }, + }) +} + +type Store_cockroach2448 struct { + multiraft *MultiRaft_cockroach2448 +} + +func (s *Store_cockroach2448) processRaft() { + for { + select { + case e := <-s.multiraft.Events: + switch e := e.(type) { + case *EventMembershipChangeCommitted_cockroach2448: + callback := e.Callback + runtime.Gosched() + if callback != nil { + callback() // Waiting for callbackChan consumption + } + } + case <-s.multiraft.stopper.ShouldStop(): + return + } + } +} + +func NewStoreAndState_cockroach2448() (*Store_cockroach2448, *state_cockroach2448) { + stopper := &Stopper_cockroach2448{ + Done: make(chan bool), + } + mltrft := &MultiRaft_cockroach2448{ + stopper: stopper, + Events: make(chan interface{}), + callbackChan: make(chan func()), + } + st := &state_cockroach2448{mltrft} + s := &Store_cockroach2448{mltrft} + return s, st +} + +func Cockroach2448() { + defer func() { + time.Sleep(time.Second) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go func() { + s, st := NewStoreAndState_cockroach2448() + // deadlocks: x > 0 + go s.processRaft() // G1 + // deadlocks: x > 0 + go st.start() // G2 + }() + } +} + +// Example of deadlock trace: +// +// G1 G2 +// -------------------------------------------------------------------------------------------------- +// s.processRaft() st.start() +// select . +// . select [default] +// . s.handleWriteResponse() +// . s.sendEvent() +// . select +// <-s.multiraft.Events <----> m.Events <- event +// . select [default] +// . s.handleWriteResponse() +// . s.sendEvent() +// . select [m.Events<-, <-s.stopper.ShouldStop()] +// callback() +// select [m.callbackChan<-,<-s.stopper.ShouldStop()] . diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go new file mode 100644 index 00000000000000..b16d4db25dd1a7 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go @@ -0,0 +1,81 @@ +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach24808", Cockroach24808) +} + +type Compactor_cockroach24808 struct { + ch chan struct{} +} + +type Stopper_cockroach24808 struct { + stop sync.WaitGroup + stopper chan struct{} +} + +func (s *Stopper_cockroach24808) RunWorker(ctx context.Context, f func(context.Context)) { + s.stop.Add(1) + go func() { + defer s.stop.Done() + f(ctx) + }() +} + +func (s *Stopper_cockroach24808) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach24808) Stop() { + close(s.stopper) +} + +func NewStopper_cockroach24808() *Stopper_cockroach24808 { + s := &Stopper_cockroach24808{ + stopper: make(chan struct{}), + } + return s +} + +func NewCompactor_cockroach24808() *Compactor_cockroach24808 { + return &Compactor_cockroach24808{ch: make(chan struct{}, 1)} +} + +func (c *Compactor_cockroach24808) Start(ctx context.Context, stopper *Stopper_cockroach24808) { + c.ch <- struct{}{} + stopper.RunWorker(ctx, func(ctx context.Context) { + for { + select { + case <-stopper.ShouldStop(): + return + case <-c.ch: + } + } + }) +} + +func Cockroach24808() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + stopper := NewStopper_cockroach24808() + defer stopper.Stop() + + compactor := NewCompactor_cockroach24808() + compactor.ch <- struct{}{} + + compactor.Start(context.Background(), stopper) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go new file mode 100644 index 00000000000000..961c2fe7c065f3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go @@ -0,0 +1,91 @@ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Cockroach25456", Cockroach25456) +} + +type Stopper_cockroach25456 struct { + quiescer chan struct{} +} + +func (s *Stopper_cockroach25456) ShouldQuiesce() <-chan struct{} { + if s == nil { + return nil + } + return s.quiescer +} + +func NewStopper_cockroach25456() *Stopper_cockroach25456 { + return &Stopper_cockroach25456{quiescer: make(chan struct{})} +} + +type Store_cockroach25456 struct { + stopper *Stopper_cockroach25456 + consistencyQueue *consistencyQueue_cockroach25456 +} + +func (s *Store_cockroach25456) Stopper() *Stopper_cockroach25456 { + return s.stopper +} +func (s *Store_cockroach25456) Start(stopper *Stopper_cockroach25456) { + s.stopper = stopper +} + +func NewStore_cockroach25456() *Store_cockroach25456 { + return &Store_cockroach25456{ + consistencyQueue: newConsistencyQueue_cockroach25456(), + } +} + +type Replica_cockroach25456 struct { + store *Store_cockroach25456 +} + +func NewReplica_cockroach25456(store *Store_cockroach25456) *Replica_cockroach25456 { + return &Replica_cockroach25456{store: store} +} + +type consistencyQueue_cockroach25456 struct{} + +func (q *consistencyQueue_cockroach25456) process(repl *Replica_cockroach25456) { + <-repl.store.Stopper().ShouldQuiesce() +} + +func newConsistencyQueue_cockroach25456() *consistencyQueue_cockroach25456 { + return &consistencyQueue_cockroach25456{} +} + +type testContext_cockroach25456 struct { + store *Store_cockroach25456 + repl *Replica_cockroach25456 +} + +func (tc *testContext_cockroach25456) StartWithStoreConfig(stopper *Stopper_cockroach25456) { + if tc.store == nil { + tc.store = NewStore_cockroach25456() + } + tc.store.Start(stopper) + tc.repl = NewReplica_cockroach25456(tc.store) +} + +func Cockroach25456() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + stopper := NewStopper_cockroach25456() + tc := testContext_cockroach25456{} + tc.StartWithStoreConfig(stopper) + + for i := 0; i < 2; i++ { + tc.store.consistencyQueue.process(tc.repl) + } + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go new file mode 100644 index 00000000000000..9ca074f83f279a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go @@ -0,0 +1,117 @@ +package main + +import ( + "runtime" + "sync" + "sync/atomic" + "time" +) + +func init() { + register("Cockroach35073", Cockroach35073) +} + +type ConsumerStatus_cockroach35073 uint32 + +const ( + NeedMoreRows_cockroach35073 ConsumerStatus_cockroach35073 = iota + DrainRequested_cockroach35073 + ConsumerClosed_cockroach35073 +) + +const rowChannelBufSize_cockroach35073 = 16 +const outboxBufRows_cockroach35073 = 16 + +type rowSourceBase_cockroach35073 struct { + consumerStatus ConsumerStatus_cockroach35073 +} + +func (rb *rowSourceBase_cockroach35073) consumerClosed() { + atomic.StoreUint32((*uint32)(&rb.consumerStatus), uint32(ConsumerClosed_cockroach35073)) +} + +type RowChannelMsg_cockroach35073 int + +type RowChannel_cockroach35073 struct { + rowSourceBase_cockroach35073 + dataChan chan RowChannelMsg_cockroach35073 +} + +func (rc *RowChannel_cockroach35073) ConsumerClosed() { + rc.consumerClosed() + select { + case <-rc.dataChan: + default: + } +} + +func (rc *RowChannel_cockroach35073) Push() ConsumerStatus_cockroach35073 { + consumerStatus := ConsumerStatus_cockroach35073( + atomic.LoadUint32((*uint32)(&rc.consumerStatus))) + switch consumerStatus { + case NeedMoreRows_cockroach35073: + rc.dataChan <- RowChannelMsg_cockroach35073(0) + case DrainRequested_cockroach35073: + case ConsumerClosed_cockroach35073: + } + return consumerStatus +} + +func (rc *RowChannel_cockroach35073) InitWithNumSenders() { + rc.initWithBufSizeAndNumSenders(rowChannelBufSize_cockroach35073) +} + +func (rc *RowChannel_cockroach35073) initWithBufSizeAndNumSenders(chanBufSize int) { + rc.dataChan = make(chan RowChannelMsg_cockroach35073, chanBufSize) +} + +type outbox_cockroach35073 struct { + RowChannel_cockroach35073 +} + +func (m *outbox_cockroach35073) init() { + m.RowChannel_cockroach35073.InitWithNumSenders() +} + +func (m *outbox_cockroach35073) start(wg *sync.WaitGroup) { + if wg != nil { + wg.Add(1) + } + go m.run(wg) +} + +func (m *outbox_cockroach35073) run(wg *sync.WaitGroup) { + if wg != nil { + wg.Done() + } +} + +func Cockroach35073() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + outbox := &outbox_cockroach35073{} + outbox.init() + + var wg sync.WaitGroup + for i := 0; i < outboxBufRows_cockroach35073; i++ { + outbox.Push() + } + + var blockedPusherWg sync.WaitGroup + blockedPusherWg.Add(1) + go func() { + // deadlocks: 1 + outbox.Push() + blockedPusherWg.Done() + }() + + outbox.start(&wg) + + wg.Wait() + outbox.RowChannel_cockroach35073.Push() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go new file mode 100644 index 00000000000000..587c1dfc58f4bc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go @@ -0,0 +1,127 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach35931", Cockroach35931) +} + +type RowReceiver_cockroach35931 interface { + Push() +} + +type inboundStreamInfo_cockroach35931 struct { + receiver RowReceiver_cockroach35931 +} + +type RowChannel_cockroach35931 struct { + dataChan chan struct{} +} + +func (rc *RowChannel_cockroach35931) Push() { + // The buffer size can be either 0 or 1 when this function is entered. + // We need context sensitivity or a path-condition on the buffer size + // to find this bug. + rc.dataChan <- struct{}{} +} + +func (rc *RowChannel_cockroach35931) initWithBufSizeAndNumSenders(chanBufSize int) { + rc.dataChan = make(chan struct{}, chanBufSize) +} + +type flowEntry_cockroach35931 struct { + flow *Flow_cockroach35931 + inboundStreams map[int]*inboundStreamInfo_cockroach35931 +} + +type flowRegistry_cockroach35931 struct { + sync.Mutex + flows map[int]*flowEntry_cockroach35931 +} + +func (fr *flowRegistry_cockroach35931) getEntryLocked(id int) *flowEntry_cockroach35931 { + entry, ok := fr.flows[id] + if !ok { + entry = &flowEntry_cockroach35931{} + fr.flows[id] = entry + } + return entry +} + +func (fr *flowRegistry_cockroach35931) cancelPendingStreamsLocked(id int) []RowReceiver_cockroach35931 { + entry := fr.flows[id] + pendingReceivers := make([]RowReceiver_cockroach35931, 0) + for _, is := range entry.inboundStreams { + pendingReceivers = append(pendingReceivers, is.receiver) + } + return pendingReceivers +} + +type Flow_cockroach35931 struct { + id int + flowRegistry *flowRegistry_cockroach35931 + inboundStreams map[int]*inboundStreamInfo_cockroach35931 +} + +func (f *Flow_cockroach35931) cancel() { + f.flowRegistry.Lock() + timedOutReceivers := f.flowRegistry.cancelPendingStreamsLocked(f.id) + f.flowRegistry.Unlock() + + for _, receiver := range timedOutReceivers { + receiver.Push() + } +} + +func (fr *flowRegistry_cockroach35931) RegisterFlow(f *Flow_cockroach35931, inboundStreams map[int]*inboundStreamInfo_cockroach35931) { + entry := fr.getEntryLocked(f.id) + entry.flow = f + entry.inboundStreams = inboundStreams +} + +func makeFlowRegistry_cockroach35931() *flowRegistry_cockroach35931 { + return &flowRegistry_cockroach35931{ + flows: make(map[int]*flowEntry_cockroach35931), + } +} + +func Cockroach35931() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + fr := makeFlowRegistry_cockroach35931() + + left := &RowChannel_cockroach35931{} + left.initWithBufSizeAndNumSenders(1) + right := &RowChannel_cockroach35931{} + right.initWithBufSizeAndNumSenders(1) + + inboundStreams := map[int]*inboundStreamInfo_cockroach35931{ + 0: { + receiver: left, + }, + 1: { + receiver: right, + }, + } + + left.Push() + + flow := &Flow_cockroach35931{ + id: 0, + flowRegistry: fr, + inboundStreams: inboundStreams, + } + + fr.RegisterFlow(flow, inboundStreams) + + flow.cancel() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go new file mode 100644 index 00000000000000..c1fe8abec02678 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go @@ -0,0 +1,132 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/3710 + * Buggy version: 4afdd4860fd7c3bd9e92489f84a95e5cc7d11a0d + * fix commit-id: cb65190f9caaf464723e7d072b1f1b69a044ef7b + * Flaky: 2/100 + * Description: This deadlock is casued by acquiring a RLock twice in a call chain. + * ForceRaftLogScanAndProcess(acquire s.mu.RLock()) ->MaybeAdd()->shouldQueue()-> + * getTruncatableIndexes()->RaftStatus(acquire s.mu.Rlock()) + */ + +package main + +import ( + "runtime" + "sync" + "time" + "unsafe" +) + +func init() { + register("Cockroach3710", Cockroach3710) +} + +type Store_cockroach3710 struct { + raftLogQueue *baseQueue + replicas map[int]*Replica_cockroach3710 + + mu struct { + sync.RWMutex + } +} + +func (s *Store_cockroach3710) ForceRaftLogScanAndProcess() { + s.mu.RLock() + runtime.Gosched() + for _, r := range s.replicas { + s.raftLogQueue.MaybeAdd(r) + } + s.mu.RUnlock() +} + +func (s *Store_cockroach3710) RaftStatus() { + s.mu.RLock() + defer s.mu.RUnlock() +} + +func (s *Store_cockroach3710) processRaft() { + go func() { + // deadlocks: x > 0 + for { + var replicas []*Replica_cockroach3710 + s.mu.Lock() + for _, r := range s.replicas { + replicas = append(replicas, r) + } + s.mu.Unlock() + break + } + }() +} + +type Replica_cockroach3710 struct { + store *Store_cockroach3710 +} + +type baseQueue struct { + sync.Mutex + impl *raftLogQueue +} + +func (bq *baseQueue) MaybeAdd(repl *Replica_cockroach3710) { + bq.Lock() + defer bq.Unlock() + bq.impl.shouldQueue(repl) +} + +type raftLogQueue struct{} + +func (*raftLogQueue) shouldQueue(r *Replica_cockroach3710) { + getTruncatableIndexes(r) +} + +func getTruncatableIndexes(r *Replica_cockroach3710) { + r.store.RaftStatus() +} + +func NewStore_cockroach3710() *Store_cockroach3710 { + rlq := &raftLogQueue{} + bq := &baseQueue{impl: rlq} + store := &Store_cockroach3710{ + raftLogQueue: bq, + replicas: make(map[int]*Replica_cockroach3710), + } + r1 := &Replica_cockroach3710{store} + r2 := &Replica_cockroach3710{store} + + makeKey := func(r *Replica_cockroach3710) int { + return int((uintptr(unsafe.Pointer(r)) >> 1) % 7) + } + store.replicas[makeKey(r1)] = r1 + store.replicas[makeKey(r2)] = r2 + + return store +} + +/// G1 G2 +/// store.ForceRaftLogScanAndProcess() +/// s.mu.RLock() +/// s.raftLogQueue.MaybeAdd() +/// bq.impl.shouldQueue() +/// getTruncatableIndexes() +/// r.store.RaftStatus() +/// store.processRaft() +/// s.mu.Lock() +/// s.mu.RLock() +/// ----------------------G1,G2 deadlock--------------------- + +func Cockroach3710() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 10000; i++ { + go func() { + store := NewStore_cockroach3710() + // deadlocks: x > 0 + go store.ForceRaftLogScanAndProcess() // G1 + go store.processRaft() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go new file mode 100644 index 00000000000000..89602804ca9727 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go @@ -0,0 +1,58 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach584", Cockroach584) +} + +type gossip_cockroach584 struct { + mu sync.Mutex + closed bool +} + +func (g *gossip_cockroach584) bootstrap() { + for { + g.mu.Lock() + if g.closed { + /// Missing g.mu.Unlock + break + } + g.mu.Unlock() + } +} + +func (g *gossip_cockroach584) manage() { + for { + g.mu.Lock() + if g.closed { + /// Missing g.mu.Unlock + break + } + g.mu.Unlock() + } +} + +func Cockroach584() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + g := &gossip_cockroach584{ + closed: true, + } + go func() { + // deadlocks: x > 0 + g.bootstrap() + g.manage() + }() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go new file mode 100644 index 00000000000000..20c440678e76a3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go @@ -0,0 +1,100 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/6181 + * Buggy version: c0a232b5521565904b851699853bdbd0c670cf1e + * fix commit-id: d5814e4886a776bf7789b3c51b31f5206480d184 + * Flaky: 57/100 + */ +package main + +import ( + "fmt" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach6181", Cockroach6181) +} + +type testDescriptorDB_cockroach6181 struct { + cache *rangeDescriptorCache_cockroach6181 +} + +func initTestDescriptorDB_cockroach6181() *testDescriptorDB_cockroach6181 { + return &testDescriptorDB_cockroach6181{&rangeDescriptorCache_cockroach6181{}} +} + +type rangeDescriptorCache_cockroach6181 struct { + rangeCacheMu sync.RWMutex +} + +func (rdc *rangeDescriptorCache_cockroach6181) LookupRangeDescriptor() { + rdc.rangeCacheMu.RLock() + runtime.Gosched() + fmt.Println("lookup range descriptor:", rdc) + rdc.rangeCacheMu.RUnlock() + rdc.rangeCacheMu.Lock() + rdc.rangeCacheMu.Unlock() +} + +func (rdc *rangeDescriptorCache_cockroach6181) String() string { + rdc.rangeCacheMu.RLock() + defer rdc.rangeCacheMu.RUnlock() + return rdc.stringLocked() +} + +func (rdc *rangeDescriptorCache_cockroach6181) stringLocked() string { + return "something here" +} + +func doLookupWithToken_cockroach6181(rc *rangeDescriptorCache_cockroach6181) { + rc.LookupRangeDescriptor() +} + +func testRangeCacheCoalescedRequests_cockroach6181() { + // deadlocks: x > 0 + db := initTestDescriptorDB_cockroach6181() + pauseLookupResumeAndAssert := func() { + var wg sync.WaitGroup + for i := 0; i < 3; i++ { + wg.Add(1) + go func() { // G2,G3,... + // deadlocks: x > 0 + doLookupWithToken_cockroach6181(db.cache) + wg.Done() + }() + } + wg.Wait() + } + pauseLookupResumeAndAssert() +} + +/// G1 G2 G3 ... +/// testRangeCacheCoalescedRquests() +/// initTestDescriptorDB() +/// pauseLookupResumeAndAssert() +/// return +/// doLookupWithToken() +/// doLookupWithToken() +/// rc.LookupRangeDescriptor() +/// rc.LookupRangeDescriptor() +/// rdc.rangeCacheMu.RLock() +/// rdc.String() +/// rdc.rangeCacheMu.RLock() +/// fmt.Printf() +/// rdc.rangeCacheMu.RUnlock() +/// rdc.rangeCacheMu.Lock() +/// rdc.rangeCacheMu.RLock() +/// -------------------------------------G2,G3,... deadlock-------------------------------------- + +func Cockroach6181() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go testRangeCacheCoalescedRequests_cockroach6181() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go new file mode 100644 index 00000000000000..9fd36f243b9ef9 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go @@ -0,0 +1,196 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/7504 + * Buggy version: bc963b438cdc3e0ad058a5282358e5aee0595e17 + * fix commit-id: cab761b9f5ee5dee1448bc5d6b1d9f5a0ff0bad5 + * Flaky: 1/100 + * Description: There are locking leaseState, tableNameCache in Release(), but + * tableNameCache,LeaseState in AcquireByName. It is AB and BA deadlock. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach7504", Cockroach7504) +} + +func MakeCacheKey_cockroach7504(lease *LeaseState_cockroach7504) int { + return lease.id +} + +type LeaseState_cockroach7504 struct { + mu sync.Mutex // L1 + id int +} +type LeaseSet_cockroach7504 struct { + data []*LeaseState_cockroach7504 +} + +func (l *LeaseSet_cockroach7504) find(id int) *LeaseState_cockroach7504 { + return l.data[id] +} + +func (l *LeaseSet_cockroach7504) remove(s *LeaseState_cockroach7504) { + for i := 0; i < len(l.data); i++ { + if s == l.data[i] { + l.data = append(l.data[:i], l.data[i+1:]...) + break + } + } +} + +type tableState_cockroach7504 struct { + tableNameCache *tableNameCache_cockroach7504 + mu sync.Mutex // L3 + active *LeaseSet_cockroach7504 +} + +func (t *tableState_cockroach7504) release(lease *LeaseState_cockroach7504) { + t.mu.Lock() // L3 + defer t.mu.Unlock() // L3 + + s := t.active.find(MakeCacheKey_cockroach7504(lease)) + s.mu.Lock() // L1 + runtime.Gosched() + defer s.mu.Unlock() // L1 + + t.removeLease(s) +} +func (t *tableState_cockroach7504) removeLease(lease *LeaseState_cockroach7504) { + t.active.remove(lease) + t.tableNameCache.remove(lease) // L1 acquire/release +} + +type tableNameCache_cockroach7504 struct { + mu sync.Mutex // L2 + tables map[int]*LeaseState_cockroach7504 +} + +func (c *tableNameCache_cockroach7504) get(id int) { + c.mu.Lock() // L2 + defer c.mu.Unlock() // L2 + lease, ok := c.tables[id] + if !ok { + return + } + if lease == nil { + panic("nil lease in name cache") + } + lease.mu.Lock() // L1 + defer lease.mu.Unlock() // L1 +} + +func (c *tableNameCache_cockroach7504) remove(lease *LeaseState_cockroach7504) { + c.mu.Lock() // L2 + runtime.Gosched() + defer c.mu.Unlock() // L2 + key := MakeCacheKey_cockroach7504(lease) + existing, ok := c.tables[key] + if !ok { + return + } + if existing == lease { + delete(c.tables, key) + } +} + +type LeaseManager_cockroach7504 struct { + _ [64]byte + tableNames *tableNameCache_cockroach7504 + tables map[int]*tableState_cockroach7504 +} + +func (m *LeaseManager_cockroach7504) AcquireByName(id int) { + m.tableNames.get(id) +} + +func (m *LeaseManager_cockroach7504) findTableState(lease *LeaseState_cockroach7504) *tableState_cockroach7504 { + existing, ok := m.tables[lease.id] + if !ok { + return nil + } + return existing +} + +func (m *LeaseManager_cockroach7504) Release(lease *LeaseState_cockroach7504) { + t := m.findTableState(lease) + t.release(lease) +} +func NewLeaseManager_cockroach7504(tname *tableNameCache_cockroach7504, ts *tableState_cockroach7504) *LeaseManager_cockroach7504 { + mgr := &LeaseManager_cockroach7504{ + tableNames: tname, + tables: make(map[int]*tableState_cockroach7504), + } + mgr.tables[0] = ts + return mgr +} +func NewLeaseSet_cockroach7504(n int) *LeaseSet_cockroach7504 { + lset := &LeaseSet_cockroach7504{} + for i := 0; i < n; i++ { + lease := new(LeaseState_cockroach7504) + lset.data = append(lset.data, lease) + } + return lset +} + +func Cockroach7504() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func() { + leaseNum := 2 + lset := NewLeaseSet_cockroach7504(leaseNum) + + nc := &tableNameCache_cockroach7504{ + tables: make(map[int]*LeaseState_cockroach7504), + } + for i := 0; i < leaseNum; i++ { + nc.tables[i] = lset.find(i) + } + + ts := &tableState_cockroach7504{ + tableNameCache: nc, + active: lset, + } + + mgr := NewLeaseManager_cockroach7504(nc, ts) + + // G1 + go func() { + // deadlocks: x > 0 + // lock L2-L1 + mgr.AcquireByName(0) + }() + + // G2 + go func() { + // deadlocks: x > 0 + // lock L1-L2 + mgr.Release(lset.find(0)) + }() + }() + } +} + +// Example deadlock trace: +// +// G1 G2 +// ------------------------------------------------------------------------------------------------ +// mgr.AcquireByName(0) mgr.Release(lset.find(0)) +// m.tableNames.get(id) . +// c.mu.Lock() [L2] . +// . t.release(lease) +// . t.mu.Lock() [L3] +// . s.mu.Lock() [L1] +// lease.mu.Lock() [L1] . +// . t.removeLease(s) +// . t.tableNameCache.remove(lease) +// . c.mu.Lock() [L2] +// ---------------------------------------G1, G2 leak---------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go new file mode 100644 index 00000000000000..e0d1a44a2b4ef9 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go @@ -0,0 +1,58 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/9935 + * Buggy version: 4df302cc3f03328395dc3fefbfba58b7718e4f2f + * fix commit-id: ed6a100ba38dd51b0888b9a3d3ac6bdbb26c528c + * Flaky: 100/100 + * Description: This bug is caused by acquiring l.mu.Lock() twice. The fix is + * to release l.mu.Lock() before acquiring l.mu.Lock for the second time. + */ +package main + +import ( + "errors" + "math/rand" + "runtime" + "sync" + "time" +) + +func init() { + register("Cockroach9935", Cockroach9935) +} + +type loggingT_cockroach9935 struct { + mu sync.Mutex +} + +func (l *loggingT_cockroach9935) outputLogEntry() { + l.mu.Lock() + if err := l.createFile(); err != nil { + l.exit(err) + } + l.mu.Unlock() +} +func (l *loggingT_cockroach9935) createFile() error { + if rand.Intn(8)%4 > 0 { + return errors.New("") + } + return nil +} +func (l *loggingT_cockroach9935) exit(err error) { + l.mu.Lock() + defer l.mu.Unlock() +} +func Cockroach9935() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + l := &loggingT_cockroach9935{} + // deadlocks: x > 0 + go l.outputLogEntry() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go new file mode 100644 index 00000000000000..ed9165f751ed3c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go @@ -0,0 +1,65 @@ +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd10492", Etcd10492) +} + +type Checkpointer_etcd10492 func(ctx context.Context) + +type lessor_etcd10492 struct { + mu sync.RWMutex + cp Checkpointer_etcd10492 + checkpointInterval time.Duration +} + +func (le *lessor_etcd10492) Checkpoint() { + le.mu.Lock() + defer le.mu.Unlock() +} + +func (le *lessor_etcd10492) SetCheckpointer(cp Checkpointer_etcd10492) { + le.mu.Lock() + defer le.mu.Unlock() + + le.cp = cp +} + +func (le *lessor_etcd10492) Renew() { + le.mu.Lock() + unlock := func() { le.mu.Unlock() } + defer func() { unlock() }() + + if le.cp != nil { + le.cp(context.Background()) + } +} +func Etcd10492() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + + le := &lessor_etcd10492{ + checkpointInterval: 0, + } + fakerCheckerpointer_etcd10492 := func(ctx context.Context) { + le.Checkpoint() + } + le.SetCheckpointer(fakerCheckerpointer_etcd10492) + le.mu.Lock() + le.mu.Unlock() + le.Renew() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go new file mode 100644 index 00000000000000..3b8e8ed1cafcaa --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go @@ -0,0 +1,120 @@ +package main + +import ( + "context" + "fmt" + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd5509", Etcd5509) +} + +var ErrConnClosed_etcd5509 error + +type Client_etcd5509 struct { + mu sync.RWMutex + ctx context.Context + cancel context.CancelFunc +} + +func (c *Client_etcd5509) Close() { + c.mu.Lock() + defer c.mu.Unlock() + if c.cancel == nil { + return + } + c.cancel() + c.cancel = nil + c.mu.Unlock() + c.mu.Lock() +} + +type remoteClient_etcd5509 struct { + client *Client_etcd5509 + mu sync.Mutex +} + +func (r *remoteClient_etcd5509) acquire(ctx context.Context) error { + for { + r.client.mu.RLock() + closed := r.client.cancel == nil + r.mu.Lock() + r.mu.Unlock() + if closed { + return ErrConnClosed_etcd5509 // Missing RUnlock before return + } + r.client.mu.RUnlock() + } +} + +type kv_etcd5509 struct { + rc *remoteClient_etcd5509 +} + +func (kv *kv_etcd5509) Get(ctx context.Context) error { + return kv.Do(ctx) +} + +func (kv *kv_etcd5509) Do(ctx context.Context) error { + for { + err := kv.do(ctx) + if err == nil { + return nil + } + return err + } +} + +func (kv *kv_etcd5509) do(ctx context.Context) error { + err := kv.getRemote(ctx) + return err +} + +func (kv *kv_etcd5509) getRemote(ctx context.Context) error { + return kv.rc.acquire(ctx) +} + +type KV interface { + Get(ctx context.Context) error + Do(ctx context.Context) error +} + +func NewKV_etcd5509(c *Client_etcd5509) KV { + return &kv_etcd5509{rc: &remoteClient_etcd5509{ + client: c, + }} +} + +func Etcd5509() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 10; i++ { + go func() { + // deadlocks: x > 0 + ctx, _ := context.WithCancel(context.TODO()) + cli := &Client_etcd5509{ + ctx: ctx, + } + kv := NewKV_etcd5509(cli) + donec := make(chan struct{}) + go func() { + defer close(donec) + err := kv.Get(context.TODO()) + if err != nil && err != ErrConnClosed_etcd5509 { + fmt.Println("Expect ErrConnClosed") + } + }() + + runtime.Gosched() + cli.Close() + + <-donec + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go new file mode 100644 index 00000000000000..40690395e5589d --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go @@ -0,0 +1,94 @@ +package main + +import ( + "context" + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd6708", Etcd6708) +} + +type EndpointSelectionMode_etcd6708 int + +const ( + EndpointSelectionRandom_etcd6708 EndpointSelectionMode_etcd6708 = iota + EndpointSelectionPrioritizeLeader_etcd6708 +) + +type MembersAPI_etcd6708 interface { + Leader(ctx context.Context) +} + +type Client_etcd6708 interface { + Sync(ctx context.Context) + SetEndpoints() + httpClient_etcd6708 +} + +type httpClient_etcd6708 interface { + Do(context.Context) +} + +type httpClusterClient_etcd6708 struct { + sync.RWMutex + selectionMode EndpointSelectionMode_etcd6708 +} + +func (c *httpClusterClient_etcd6708) getLeaderEndpoint() { + mAPI := NewMembersAPI_etcd6708(c) + mAPI.Leader(context.Background()) +} + +func (c *httpClusterClient_etcd6708) SetEndpoints() { + switch c.selectionMode { + case EndpointSelectionRandom_etcd6708: + case EndpointSelectionPrioritizeLeader_etcd6708: + c.getLeaderEndpoint() + } +} + +func (c *httpClusterClient_etcd6708) Do(ctx context.Context) { + c.RLock() + c.RUnlock() +} + +func (c *httpClusterClient_etcd6708) Sync(ctx context.Context) { + c.Lock() + defer c.Unlock() + + c.SetEndpoints() +} + +type httpMembersAPI_etcd6708 struct { + client httpClient_etcd6708 +} + +func (m *httpMembersAPI_etcd6708) Leader(ctx context.Context) { + m.client.Do(ctx) +} + +func NewMembersAPI_etcd6708(c Client_etcd6708) MembersAPI_etcd6708 { + return &httpMembersAPI_etcd6708{ + client: c, + } +} + +func Etcd6708() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + hc := &httpClusterClient_etcd6708{ + selectionMode: EndpointSelectionPrioritizeLeader_etcd6708, + } + hc.Sync(context.Background()) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6857.go b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go new file mode 100644 index 00000000000000..7411a3216143d0 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go @@ -0,0 +1,91 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/pull/6857 + * Buggy version: 7c8f13aed7fe251e7066ed6fc1a090699c2cae0e + * fix commit-id: 7afc490c95789c408fbc256d8e790273d331c984 + * Flaky: 19/100 + */ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Etcd6857", Etcd6857) +} + +type Status_etcd6857 struct{} + +type node_etcd6857 struct { + status chan chan Status_etcd6857 + stop chan struct{} + done chan struct{} +} + +func (n *node_etcd6857) Status() Status_etcd6857 { + c := make(chan Status_etcd6857) + n.status <- c + return <-c +} + +func (n *node_etcd6857) run() { + for { + select { + case c := <-n.status: + c <- Status_etcd6857{} + case <-n.stop: + close(n.done) + return + } + } +} + +func (n *node_etcd6857) Stop() { + select { + case n.stop <- struct{}{}: + case <-n.done: + return + } + <-n.done +} + +func NewNode_etcd6857() *node_etcd6857 { + return &node_etcd6857{ + status: make(chan chan Status_etcd6857), + stop: make(chan struct{}), + done: make(chan struct{}), + } +} + +/// +/// G1 G2 G3 +/// n.run() +/// n.Stop() +/// n.stop<- +/// <-n.stop +/// <-n.done +/// close(n.done) +/// return +/// return +/// n.Status() +/// n.status<- +///----------------G2 leak------------------- +/// + +func Etcd6857() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i <= 100; i++ { + go func() { + n := NewNode_etcd6857() + go n.run() // G1 + // deadlocks: x > 0 + go n.Status() // G2 + go n.Stop() // G3 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6873.go b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go new file mode 100644 index 00000000000000..848c56d801492a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go @@ -0,0 +1,92 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/commit/7618fdd1d642e47cac70c03f637b0fd798a53a6e + * Buggy version: 377f19b0031f9c0aafe2aec28b6f9019311f52f9 + * fix commit-id: 7618fdd1d642e47cac70c03f637b0fd798a53a6e + * Flaky: 9/100 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd6873", Etcd6873) +} + +type watchBroadcast_etcd6873 struct{} + +type watchBroadcasts_etcd6873 struct { + mu sync.Mutex + updatec chan *watchBroadcast_etcd6873 + donec chan struct{} +} + +func newWatchBroadcasts_etcd6873() *watchBroadcasts_etcd6873 { + wbs := &watchBroadcasts_etcd6873{ + updatec: make(chan *watchBroadcast_etcd6873, 1), + donec: make(chan struct{}), + } + go func() { // G2 + defer close(wbs.donec) + // deadlocks: x > 0 + for wb := range wbs.updatec { + wbs.coalesce(wb) + } + }() + return wbs +} + +func (wbs *watchBroadcasts_etcd6873) coalesce(wb *watchBroadcast_etcd6873) { + wbs.mu.Lock() + wbs.mu.Unlock() +} + +func (wbs *watchBroadcasts_etcd6873) stop() { + wbs.mu.Lock() + defer wbs.mu.Unlock() + close(wbs.updatec) + <-wbs.donec +} + +func (wbs *watchBroadcasts_etcd6873) update(wb *watchBroadcast_etcd6873) { + select { + case wbs.updatec <- wb: + default: + } +} + +/// +/// G1 G2 G3 +/// newWatchBroadcasts() +/// wbs.update() +/// wbs.updatec <- +/// return +/// <-wbs.updatec +/// wbs.coalesce() +/// wbs.stop() +/// wbs.mu.Lock() +/// close(wbs.updatec) +/// <-wbs.donec +/// wbs.mu.Lock() +///---------------------G2,G3 deadlock------------------------- +/// + +func Etcd6873() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + wbs := newWatchBroadcasts_etcd6873() // G1 + wbs.update(&watchBroadcast_etcd6873{}) + // deadlocks: x > 0 + go wbs.stop() // G3 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7492.go b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go new file mode 100644 index 00000000000000..feb497f9b4512a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go @@ -0,0 +1,178 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/pull/7492 + * Buggy version: 51939650057d602bb5ab090633138fffe36854dc + * fix commit-id: 1b1fabef8ffec606909f01c3983300fff539f214 + * Flaky: 40/100 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd7492", Etcd7492) +} + +type TokenProvider_etcd7492 interface { + assign() + enable() + disable() +} + +type simpleTokenTTLKeeper_etcd7492 struct { + tokens map[string]time.Time + addSimpleTokenCh chan struct{} + stopCh chan chan struct{} + deleteTokenFunc func(string) +} + +type authStore_etcd7492 struct { + tokenProvider TokenProvider_etcd7492 +} + +func (as *authStore_etcd7492) Authenticate() { + as.tokenProvider.assign() +} + +func NewSimpleTokenTTLKeeper_etcd7492(deletefunc func(string)) *simpleTokenTTLKeeper_etcd7492 { + stk := &simpleTokenTTLKeeper_etcd7492{ + tokens: make(map[string]time.Time), + addSimpleTokenCh: make(chan struct{}, 1), + stopCh: make(chan chan struct{}), + deleteTokenFunc: deletefunc, + } + // deadlocks: x > 0 + go stk.run() // G1 + return stk +} + +func (tm *simpleTokenTTLKeeper_etcd7492) run() { + tokenTicker := time.NewTicker(time.Nanosecond) + defer tokenTicker.Stop() + for { + select { + case <-tm.addSimpleTokenCh: + runtime.Gosched() + /// Make tm.tokens not empty is enough + tm.tokens["1"] = time.Now() + case <-tokenTicker.C: + runtime.Gosched() + for t, _ := range tm.tokens { + tm.deleteTokenFunc(t) + delete(tm.tokens, t) + } + case waitCh := <-tm.stopCh: + waitCh <- struct{}{} + return + } + } +} + +func (tm *simpleTokenTTLKeeper_etcd7492) addSimpleToken() { + tm.addSimpleTokenCh <- struct{}{} + runtime.Gosched() +} + +func (tm *simpleTokenTTLKeeper_etcd7492) stop() { + waitCh := make(chan struct{}) + tm.stopCh <- waitCh + <-waitCh + close(tm.stopCh) +} + +type tokenSimple_etcd7492 struct { + simpleTokenKeeper *simpleTokenTTLKeeper_etcd7492 + simpleTokensMu sync.RWMutex +} + +func (t *tokenSimple_etcd7492) assign() { + t.assignSimpleTokenToUser() +} + +func (t *tokenSimple_etcd7492) assignSimpleTokenToUser() { + t.simpleTokensMu.Lock() + runtime.Gosched() + t.simpleTokenKeeper.addSimpleToken() + t.simpleTokensMu.Unlock() +} +func newDeleterFunc(t *tokenSimple_etcd7492) func(string) { + return func(tk string) { + t.simpleTokensMu.Lock() + defer t.simpleTokensMu.Unlock() + } +} + +func (t *tokenSimple_etcd7492) enable() { + t.simpleTokenKeeper = NewSimpleTokenTTLKeeper_etcd7492(newDeleterFunc(t)) +} + +func (t *tokenSimple_etcd7492) disable() { + if t.simpleTokenKeeper != nil { + t.simpleTokenKeeper.stop() + t.simpleTokenKeeper = nil + } + t.simpleTokensMu.Lock() + t.simpleTokensMu.Unlock() +} + +func newTokenProviderSimple_etcd7492() *tokenSimple_etcd7492 { + return &tokenSimple_etcd7492{} +} + +func setupAuthStore_etcd7492() (store *authStore_etcd7492, teardownfunc func()) { + as := &authStore_etcd7492{ + tokenProvider: newTokenProviderSimple_etcd7492(), + } + as.tokenProvider.enable() + tearDown := func() { + as.tokenProvider.disable() + } + return as, tearDown +} + +/// +/// G2 G1 +/// stk.run() +/// ts.assignSimpleTokenToUser() +/// t.simpleTokensMu.Lock() +/// t.simpleTokenKeeper.addSimpleToken() +/// tm.addSimpleTokenCh <- true +/// <-tm.addSimpleTokenCh +/// t.simpleTokensMu.Unlock() +/// ts.assignSimpleTokenToUser() +/// ... ... +/// t.simpleTokensMu.Lock() +/// <-tokenTicker.C +/// tm.addSimpleTokenCh <- true +/// tm.deleteTokenFunc() +/// t.simpleTokensMu.Lock() +///------------------------------------G1,G2 deadlock--------------------------------------------- +/// + +func Etcd7492() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + as, tearDown := setupAuthStore_etcd7492() + defer tearDown() + var wg sync.WaitGroup + wg.Add(3) + for i := 0; i < 3; i++ { + go func() { // G2 + // deadlocks: x > 0 + as.Authenticate() + defer wg.Done() + }() + } + wg.Wait() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7902.go b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go new file mode 100644 index 00000000000000..72f713ed44cc7b --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go @@ -0,0 +1,113 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/coreos/etcd/pull/7902 + * Buggy version: dfdaf082c51ba14861267f632f6af795a27eb4ef + * fix commit-id: 87d99fe0387ee1df1cf1811d88d37331939ef4ae + * Flaky: 100/100 + * Description: + * At least two goroutines are needed to trigger this bug, + * one is leader and the other is follower. Both the leader + * and the follower execute the code above. If the follower + * acquires mu.Lock() firstly and enter rc.release(), it will + * be blocked at <- rcNextc (nextc). Only the leader can execute + * close(nextc) to unblock the follower inside rc.release(). + * However, in order to invoke rc.release(), the leader needs + * to acquires mu.Lock(). + * The fix is to remove the lock and unlock around rc.release(). + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Etcd7902", Etcd7902) +} + +type roundClient_etcd7902 struct { + progress int + acquire func() + validate func() + release func() +} + +func runElectionFunc_etcd7902() { + // deadlocks: x > 0 + rcs := make([]roundClient_etcd7902, 3) + nextc := make(chan bool) + for i := range rcs { + var rcNextc chan bool + setRcNextc := func() { + rcNextc = nextc + } + rcs[i].acquire = func() {} + rcs[i].validate = func() { + setRcNextc() + } + rcs[i].release = func() { + if i == 0 { // Assume the first roundClient is the leader + close(nextc) + nextc = make(chan bool) + } + <-rcNextc // Follower is blocking here + } + } + doRounds_etcd7902(rcs, 100) +} + +func doRounds_etcd7902(rcs []roundClient_etcd7902, rounds int) { + var mu sync.Mutex + var wg sync.WaitGroup + wg.Add(len(rcs)) + for i := range rcs { + go func(rc *roundClient_etcd7902) { // G2,G3 + // deadlocks: x > 0 + defer wg.Done() + for rc.progress < rounds || rounds <= 0 { + rc.acquire() + mu.Lock() + rc.validate() + mu.Unlock() + time.Sleep(10 * time.Millisecond) + rc.progress++ + mu.Lock() + rc.release() + mu.Unlock() + } + }(&rcs[i]) + } + wg.Wait() +} + +/// +/// G1 G2 (leader) G3 (follower) +/// runElectionFunc() +/// doRounds() +/// wg.Wait() +/// ... +/// mu.Lock() +/// rc.validate() +/// rcNextc = nextc +/// mu.Unlock() ... +/// mu.Lock() +/// rc.validate() +/// mu.Unlock() +/// mu.Lock() +/// rc.release() +/// <-rcNextc +/// mu.Lock() +/// -------------------------G1,G2,G3 deadlock-------------------------- +/// + +func Etcd7902() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go runElectionFunc_etcd7902() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go new file mode 100644 index 00000000000000..e7daebcb14f74c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/goroutineleakgc.go @@ -0,0 +1,189 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +// This is a set of micro-tests with obvious goroutine leaks that +// ensures goroutine leak detection works. + +func init() { + register("NilRecv", NilRecv) + register("NilSend", NilSend) + register("SelectNoCases", SelectNoCases) + register("ChanRecv", ChanRecv) + register("ChanSend", ChanSend) + register("Select", Select) + register("WaitGroup", WaitGroup) + register("MutexStack", MutexStack) + register("MutexHeap", MutexHeap) + register("RWMutexRLock", RWMutexRLock) + register("RWMutexLock", RWMutexLock) + register("Cond", Cond) + register("Mixed", Mixed) + register("NoLeakGlobal", NoLeakGlobal) +} + +func NilRecv() { + go func() { + var c chan int + <-c + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func NilSend() { + go func() { + var c chan int + c <- 0 + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func ChanRecv() { + go func() { + <-make(chan int) + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func SelectNoCases() { + go func() { + select {} + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func ChanSend() { + go func() { + make(chan int) <- 0 + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func Select() { + go func() { + select { + case make(chan int) <- 0: + case <-make(chan int): + } + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func WaitGroup() { + go func() { + var wg sync.WaitGroup + wg.Add(1) + wg.Wait() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func MutexStack() { + for i := 0; i < 1000; i++ { + go func() { + var mu sync.Mutex + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + } + time.Sleep(10 * time.Millisecond) + runtime.GC() + time.Sleep(10 * time.Millisecond) +} + +func MutexHeap() { + for i := 0; i < 1000; i++ { + go func() { + mu := &sync.Mutex{} + go func() { + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + }() + } + time.Sleep(10 * time.Millisecond) + runtime.GC() + time.Sleep(10 * time.Millisecond) +} + +func RWMutexRLock() { + go func() { + mu := &sync.RWMutex{} + mu.Lock() + mu.RLock() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func RWMutexLock() { + go func() { + mu := &sync.RWMutex{} + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func Cond() { + go func() { + cond := sync.NewCond(&sync.Mutex{}) + cond.L.Lock() + cond.Wait() + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +func Mixed() { + go func() { + ch := make(chan int) + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + ch <- 0 + wg.Done() + panic("should not be reached") + }() + wg.Wait() + <-ch + panic("should not be reached") + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} + +var ch = make(chan int) + +// No leak should be reported by this test +func NoLeakGlobal() { + go func() { + <-ch + }() + time.Sleep(10 * time.Millisecond) + runtime.GC() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1275.go b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go new file mode 100644 index 00000000000000..f9a876cb9d0e61 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go @@ -0,0 +1,116 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/1275 + * Buggy version: (missing) + * fix commit-id: 0669f3f89e0330e94bb13fa1ce8cc704aab50c9c + * Flaky: 100/100 + * Description: + * Two goroutines are invovled in this deadlock. The first goroutine + * is the main goroutine. It is blocked at case <- donec, and it is + * waiting for the second goroutine to close the channel. + * The second goroutine is created by the main goroutine. It is blocked + * when calling stream.Read(). stream.Read() invokes recvBufferRead.Read(). + * The second goroutine is blocked at case i := r.recv.get(), and it is + * waiting for someone to send a message to this channel. + * It is the client.CloseSream() method called by the main goroutine that + * should send the message, but it is not. The patch is to send out this message. + */ +package main + +import ( + "io" + "runtime" + "time" +) + +func init() { + register("Grpc1275", Grpc1275) +} + +type recvBuffer_grpc1275 struct { + c chan bool +} + +func (b *recvBuffer_grpc1275) get() <-chan bool { + return b.c +} + +type recvBufferReader_grpc1275 struct { + recv *recvBuffer_grpc1275 +} + +func (r *recvBufferReader_grpc1275) Read(p []byte) (int, error) { + select { + case <-r.recv.get(): + } + return 0, nil +} + +type Stream_grpc1275 struct { + trReader io.Reader +} + +func (s *Stream_grpc1275) Read(p []byte) (int, error) { + return io.ReadFull(s.trReader, p) +} + +type http2Client_grpc1275 struct{} + +func (t *http2Client_grpc1275) CloseStream(s *Stream_grpc1275) { + // It is the client.CloseSream() method called by the + // main goroutine that should send the message, but it + // is not. The patch is to send out this message. +} + +func (t *http2Client_grpc1275) NewStream() *Stream_grpc1275 { + return &Stream_grpc1275{ + trReader: &recvBufferReader_grpc1275{ + recv: &recvBuffer_grpc1275{ + c: make(chan bool), + }, + }, + } +} + +func testInflightStreamClosing_grpc1275() { + client := &http2Client_grpc1275{} + stream := client.NewStream() + donec := make(chan bool) + go func() { // G2 + defer close(donec) + // deadlocks: 1 + stream.Read([]byte{1}) + }() + + client.CloseStream(stream) + + timeout := time.NewTimer(300 * time.Nanosecond) + select { + case <-donec: + if !timeout.Stop() { + <-timeout.C + } + case <-timeout.C: + } +} + +/// +/// G1 G2 +/// testInflightStreamClosing() +/// stream.Read() +/// io.ReadFull() +/// <- r.recv.get() +/// CloseStream() +/// <- donec +/// ------------G1 timeout, G2 leak--------------------- +/// + +func Grpc1275() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + testInflightStreamClosing_grpc1275() // G1 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1424.go b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go new file mode 100644 index 00000000000000..262335b1d1a1d7 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go @@ -0,0 +1,112 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/1424 + * Buggy version: 39c8c3866d926d95e11c03508bf83d00f2963f91 + * fix commit-id: 64bd0b04a7bb1982078bae6a2ab34c226125fbc1 + * Flaky: 100/100 + * Description: + * The parent function could return without draining the done channel. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Grpc1424", Grpc1424) +} + +type Balancer_grpc1424 interface { + Notify() <-chan bool +} + +type roundRobin_grpc1424 struct { + mu sync.Mutex + addrCh chan bool +} + +func (rr *roundRobin_grpc1424) Notify() <-chan bool { + return rr.addrCh +} + +type addrConn_grpc1424 struct { + mu sync.Mutex +} + +func (ac *addrConn_grpc1424) tearDown() { + ac.mu.Lock() + defer ac.mu.Unlock() +} + +type dialOption_grpc1424 struct { + balancer Balancer_grpc1424 +} + +type ClientConn_grpc1424 struct { + dopts dialOption_grpc1424 + conns []*addrConn_grpc1424 +} + +func (cc *ClientConn_grpc1424) lbWatcher(doneChan chan bool) { + for addr := range cc.dopts.balancer.Notify() { + if addr { + // nop, make compiler happy + } + var ( + del []*addrConn_grpc1424 + ) + for _, a := range cc.conns { + del = append(del, a) + } + for _, c := range del { + c.tearDown() + } + } +} + +func NewClientConn_grpc1424() *ClientConn_grpc1424 { + cc := &ClientConn_grpc1424{ + dopts: dialOption_grpc1424{ + &roundRobin_grpc1424{addrCh: make(chan bool)}, + }, + } + return cc +} + +func DialContext_grpc1424() { + cc := NewClientConn_grpc1424() + waitC := make(chan error, 1) + go func() { // G2 + defer close(waitC) + // deadlocks: 1 + ch := cc.dopts.balancer.Notify() + if ch != nil { + doneChan := make(chan bool) + go cc.lbWatcher(doneChan) // G3 + <-doneChan + } + }() + /// close addrCh + close(cc.dopts.balancer.(*roundRobin_grpc1424).addrCh) +} + +/// +/// G1 G2 G3 +/// DialContext() +/// cc.dopts.balancer.Notify() +/// cc.lbWatcher() +/// <-doneChan +/// close() +/// -----------------------G2 leak------------------------------------ +/// + +func Grpc1424() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go DialContext_grpc1424() // G1 +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1460.go b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go new file mode 100644 index 00000000000000..44e761c7f37e26 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go @@ -0,0 +1,86 @@ +/* + * Project: grpc + * Issue or PR : https://github.com/grpc/grpc-go/pull/1460 + * Buggy version: 7db1564ba1229bc42919bb1f6d9c4186f3aa8678 + * fix commit-id: e605a1ecf24b634f94f4eefdab10a9ada98b70dd + * Flaky: 100/100 + * Description: + * When gRPC keepalives are enabled (which isn't the case + * by default at this time) and PermitWithoutStream is false + * (the default), the client can deadlock when transitioning + * between having no active stream and having one active + * stream.The keepalive() goroutine is stuck at “<-t.awakenKeepalive”, + * while the main goroutine is stuck in NewStream() on t.mu.Lock(). + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Grpc1460", Grpc1460) +} + +type Stream_grpc1460 struct{} + +type http2Client_grpc1460 struct { + mu sync.Mutex + awakenKeepalive chan struct{} + activeStream []*Stream_grpc1460 +} + +func (t *http2Client_grpc1460) keepalive() { + t.mu.Lock() + if len(t.activeStream) < 1 { + <-t.awakenKeepalive + runtime.Gosched() + t.mu.Unlock() + } else { + t.mu.Unlock() + } +} + +func (t *http2Client_grpc1460) NewStream() { + t.mu.Lock() + runtime.Gosched() + t.activeStream = append(t.activeStream, &Stream_grpc1460{}) + if len(t.activeStream) == 1 { + select { + case t.awakenKeepalive <- struct{}{}: + default: + } + } + t.mu.Unlock() +} + +/// +/// G1 G2 +/// client.keepalive() +/// client.NewStream() +/// t.mu.Lock() +/// <-t.awakenKeepalive +/// t.mu.Lock() +/// ---------------G1, G2 deadlock-------------- +/// + +func Grpc1460() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + client := &http2Client_grpc1460{ + awakenKeepalive: make(chan struct{}), + } + // deadlocks: x > 0 + go client.keepalive() //G1 + // deadlocks: x > 0 + go client.NewStream() //G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc3017.go b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go new file mode 100644 index 00000000000000..47cea8068cd781 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go @@ -0,0 +1,146 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +// This test case is a reproduction of grpc/3017. +// +// It is a goroutine leak that also simultaneously engages many GC assists. +// Testing runtime behaviour when pivoting between regular and goroutine leak detection modes. + +func init() { + register("Grpc3017", Grpc3017) +} + +type Address_grpc3017 int +type SubConn_grpc3017 int + +type subConnCacheEntry_grpc3017 struct { + sc SubConn_grpc3017 + cancel func() + abortDeleting bool +} + +type lbCacheClientConn_grpc3017 struct { + mu sync.Mutex // L1 + timeout time.Duration + subConnCache map[Address_grpc3017]*subConnCacheEntry_grpc3017 + subConnToAddr map[SubConn_grpc3017]Address_grpc3017 +} + +func (ccc *lbCacheClientConn_grpc3017) NewSubConn(addrs []Address_grpc3017) SubConn_grpc3017 { + if len(addrs) != 1 { + return SubConn_grpc3017(1) + } + addrWithoutMD := addrs[0] + ccc.mu.Lock() // L1 + defer ccc.mu.Unlock() + if entry, ok := ccc.subConnCache[addrWithoutMD]; ok { + entry.cancel() + delete(ccc.subConnCache, addrWithoutMD) + return entry.sc + } + scNew := SubConn_grpc3017(1) + ccc.subConnToAddr[scNew] = addrWithoutMD + return scNew +} + +func (ccc *lbCacheClientConn_grpc3017) RemoveSubConn(sc SubConn_grpc3017) { + ccc.mu.Lock() // L1 + defer ccc.mu.Unlock() + addr, ok := ccc.subConnToAddr[sc] + if !ok { + return + } + + if entry, ok := ccc.subConnCache[addr]; ok { + if entry.sc != sc { + delete(ccc.subConnToAddr, sc) + } + return + } + + entry := &subConnCacheEntry_grpc3017{ + sc: sc, + } + ccc.subConnCache[addr] = entry + + timer := time.AfterFunc(ccc.timeout, func() { // G3 + runtime.Gosched() + ccc.mu.Lock() // L1 + // deadlocks: x > 0 + if entry.abortDeleting { + return // Missing unlock + } + delete(ccc.subConnToAddr, sc) + delete(ccc.subConnCache, addr) + ccc.mu.Unlock() + }) + + entry.cancel = func() { + if !timer.Stop() { + entry.abortDeleting = true + } + } +} + +func Grpc3017() { + defer func() { + time.Sleep(100 * time.Millisecond) + }() + + for i := 0; i < 100; i++ { + go func() { //G1 + done := make(chan struct{}) + + // deadlocks: x > 0 + ccc := &lbCacheClientConn_grpc3017{ + timeout: time.Nanosecond, + subConnCache: make(map[Address_grpc3017]*subConnCacheEntry_grpc3017), + subConnToAddr: make(map[SubConn_grpc3017]Address_grpc3017), + } + + sc := ccc.NewSubConn([]Address_grpc3017{Address_grpc3017(1)}) + go func() { // G2 + // deadlocks: x > 0 + for i := 0; i < 10000; i++ { + ccc.RemoveSubConn(sc) + sc = ccc.NewSubConn([]Address_grpc3017{Address_grpc3017(1)}) + } + close(done) + }() + <-done + }() + } +} + +// Example of a deadlocking trace +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// NewSubConn([1]) +// ccc.mu.Lock() [L1] +// sc = 1 +// ccc.subConnToAddr[1] = 1 +// go func() [G2] +// <-done . +// . ccc.RemoveSubConn(1) +// . ccc.mu.Lock() +// . addr = 1 +// . entry = &subConnCacheEntry_grpc3017{sc: 1} +// . cc.subConnCache[1] = entry +// . timer = time.AfterFunc() [G3] +// . entry.cancel = func() . +// . sc = ccc.NewSubConn([1]) . +// . ccc.mu.Lock() [L1] . +// . entry.cancel() . +// . !timer.Stop() [true] . +// . entry.abortDeleting = true . +// . . ccc.mu.Lock() +// . . <<>> +// . ccc.RemoveSubConn(1) +// . ccc.mu.Lock() [L1] +// -------------------------------------------G1, G2 leak----------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc660.go b/src/runtime/testdata/testgoroutineleakgc/grpc660.go new file mode 100644 index 00000000000000..08a1ee502ca47e --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc660.go @@ -0,0 +1,69 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/660 + * Buggy version: db85417dd0de6cc6f583672c6175a7237e5b5dd2 + * fix commit-id: ceacfbcbc1514e4e677932fd55938ac455d182fb + * Flaky: 100/100 + * Description: + * The parent function could return without draining the done channel. + */ +package main + +import ( + "math/rand" + "runtime" + "time" +) + +func init() { + register("Grpc660", Grpc660) +} + +type benchmarkClient_grpc660 struct { + stop chan bool +} + +func (bc *benchmarkClient_grpc660) doCloseLoopUnary() { + for { + done := make(chan bool) + go func() { // G2 + // deadlocks: 1 + if rand.Intn(10) > 7 { + done <- false + return + } + done <- true + }() + select { + case <-bc.stop: + return + case <-done: + } + } +} + +/// +/// G1 G2 helper goroutine +/// doCloseLoopUnary() +/// bc.stop <- true +/// <-bc.stop +/// return +/// done <- +/// ----------------------G2 leak-------------------------- +/// + +func Grpc660() { + defer func() { + time.Sleep(1 * time.Second) + runtime.GC() + }() + go func() { + bc := &benchmarkClient_grpc660{ + stop: make(chan bool), + } + go bc.doCloseLoopUnary() // G1 + go func() { // helper goroutine + bc.stop <- true + }() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc795.go b/src/runtime/testdata/testgoroutineleakgc/grpc795.go new file mode 100644 index 00000000000000..c97f9c96c67d71 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc795.go @@ -0,0 +1,70 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Grpc795", Grpc795) +} + +type Server_grpc795 struct { + mu sync.Mutex + drain bool +} + +func (s *Server_grpc795) GracefulStop() { + s.mu.Lock() + if s.drain { + s.mu.Lock() + return + } + s.drain = true + s.mu.Unlock() +} +func (s *Server_grpc795) Serve() { + s.mu.Lock() + s.mu.Unlock() +} + +func NewServer_grpc795() *Server_grpc795 { + return &Server_grpc795{} +} + +type test_grpc795 struct { + srv *Server_grpc795 +} + +func (te *test_grpc795) startServer() { + s := NewServer_grpc795() + te.srv = s + // deadlocks: x > 0 + go s.Serve() +} + +func newTest_grpc795() *test_grpc795 { + return &test_grpc795{} +} + +func testServerGracefulStopIdempotent_grpc795() { + // deadlocks: x > 0 + te := newTest_grpc795() + + te.startServer() + + for i := 0; i < 3; i++ { + te.srv.GracefulStop() + } +} + +func Grpc795() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go testServerGracefulStopIdempotent_grpc795() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc862.go b/src/runtime/testdata/testgoroutineleakgc/grpc862.go new file mode 100644 index 00000000000000..15f787cf6e53d9 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc862.go @@ -0,0 +1,109 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/862 + * Buggy version: d8f4ebe77f6b7b6403d7f98626de8a534f9b93a7 + * fix commit-id: dd5645bebff44f6b88780bb949022a09eadd7dae + * Flaky: 100/100 + * Description: + * When return value conn is nil, cc (ClientConn) is not closed. + * The goroutine executing resetAddrConn is leaked. The patch is to + * close ClientConn in the defer func(). + */ +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Grpc862", Grpc862) +} + +type ClientConn_grpc862 struct { + ctx context.Context + cancel context.CancelFunc + conns []*addrConn_grpc862 +} + +func (cc *ClientConn_grpc862) Close() { + cc.cancel() + conns := cc.conns + cc.conns = nil + for _, ac := range conns { + ac.tearDown() + } +} + +func (cc *ClientConn_grpc862) resetAddrConn() { + ac := &addrConn_grpc862{ + cc: cc, + } + cc.conns = append(cc.conns, ac) + ac.ctx, ac.cancel = context.WithCancel(cc.ctx) + ac.resetTransport() +} + +type addrConn_grpc862 struct { + cc *ClientConn_grpc862 + ctx context.Context + cancel context.CancelFunc +} + +func (ac *addrConn_grpc862) resetTransport() { + for retries := 1; ; retries++ { + _ = 2 * time.Nanosecond * time.Duration(retries) + timeout := 10 * time.Nanosecond + _, cancel := context.WithTimeout(ac.ctx, timeout) + _ = time.Now() + cancel() + <-ac.ctx.Done() + return + } +} + +func (ac *addrConn_grpc862) tearDown() { + ac.cancel() +} + +func DialContext_grpc862(ctx context.Context) (conn *ClientConn_grpc862) { + cc := &ClientConn_grpc862{} + cc.ctx, cc.cancel = context.WithCancel(context.Background()) + defer func() { + select { + case <-ctx.Done(): + if conn != nil { + conn.Close() + } + conn = nil + default: + } + }() + go func() { // G2 + // deadlocks: 1 + cc.resetAddrConn() + }() + return conn +} + +/// +/// G1 G2 +/// DialContext() +/// cc.resetAddrConn() +/// resetTransport() +/// <-ac.ctx.Done() +/// --------------G2 leak------------------ +/// + +func Grpc862() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + ctx, cancel := context.WithCancel(context.Background()) + go DialContext_grpc862(ctx) // G1 + go cancel() // helper goroutine + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go new file mode 100644 index 00000000000000..1c1f598c4578cc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go @@ -0,0 +1,99 @@ +package main + +import ( + "fmt" + "runtime" + "sync" + "time" +) + +func init() { + register("Hugo3251", Hugo3251) +} + +type remoteLock_hugo3251 struct { + sync.RWMutex // L1 + m map[string]*sync.Mutex // L2 +} + +func (l *remoteLock_hugo3251) URLLock(url string) { + l.Lock() // L1 + if _, ok := l.m[url]; !ok { + l.m[url] = &sync.Mutex{} + } + l.m[url].Lock() // L2 + runtime.Gosched() + l.Unlock() // L1 + // runtime.Gosched() +} + +func (l *remoteLock_hugo3251) URLUnlock(url string) { + l.RLock() // L1 + defer l.RUnlock() // L1 + if um, ok := l.m[url]; ok { + um.Unlock() // L2 + } +} + +func resGetRemote_hugo3251(remoteURLLock *remoteLock_hugo3251, url string) error { + remoteURLLock.URLLock(url) + defer func() { remoteURLLock.URLUnlock(url) }() + + return nil +} + +func Hugo3251() { + defer func() { + time.Sleep(time.Second) + runtime.GC() + }() + + for i := 0; i < 10; i++ { + go func() { // G1 + // deadlocks: x > 0 + url := "http://Foo.Bar/foo_Bar-Foo" + remoteURLLock := &remoteLock_hugo3251{m: make(map[string]*sync.Mutex)} + for range []bool{false, true} { + var wg sync.WaitGroup + for i := 0; i < 100; i++ { + wg.Add(1) + go func(gor int) { // G2 + // deadlocks: x > 0 + defer wg.Done() + for j := 0; j < 200; j++ { + err := resGetRemote_hugo3251(remoteURLLock, url) + if err != nil { + fmt.Errorf("Error getting resource content: %s", err) + } + time.Sleep(300 * time.Nanosecond) + } + }(i) + } + wg.Wait() + } + }() + } +} + +// Example of deadlocking trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// wg.Add(1) [W1: 1] +// go func() [G2] +// go func() [G3] +// . resGetRemote() +// . remoteURLLock.URLLock(url) +// . l.Lock() [L1] +// . l.m[url] = &sync.Mutex{} [L2] +// . l.m[url].Lock() [L2] +// . l.Unlock() [L1] +// . . resGetRemote() +// . . remoteURLLock.URLLock(url) +// . . l.Lock() [L1] +// . . l.m[url].Lock() [L2] +// . remoteURLLock.URLUnlock(url) +// . l.RLock() [L1] +// ... +// wg.Wait() [W1] +// ----------------------------------------G1,G2,G3 leak------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo5379.go b/src/runtime/testdata/testgoroutineleakgc/hugo5379.go new file mode 100644 index 00000000000000..cee52801319d04 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/hugo5379.go @@ -0,0 +1,315 @@ +package main + +import ( + "context" + "runtime" + + "log" + "sync" + "time" +) + +func init() { + register("Hugo5379", Hugo5379) +} + +type shortcodeHandler_hugo5379 struct { + p *PageWithoutContent_hugo5379 + contentShortcodes map[int]func() error + contentShortcodesDelta map[int]func() error + init sync.Once // O1 +} + +func (s *shortcodeHandler_hugo5379) executeShortcodesForDelta(p *PageWithoutContent_hugo5379) error { + for k, _ := range s.contentShortcodesDelta { + render := s.contentShortcodesDelta[k] + if err := render(); err != nil { + continue + } + } + return nil +} + +func (s *shortcodeHandler_hugo5379) updateDelta() { + s.init.Do(func() { + s.contentShortcodes = createShortcodeRenderers_hugo5379(s.p.withoutContent()) + }) + + delta := make(map[int]func() error) + + for k, v := range s.contentShortcodes { + if _, ok := delta[k]; !ok { + delta[k] = v + } + } + + s.contentShortcodesDelta = delta +} + +type Page_hugo5379 struct { + *pageInit_hugo5379 + *pageContentInit_hugo5379 + pageWithoutContent *PageWithoutContent_hugo5379 + contentInit sync.Once // O2 + contentInitMu sync.Mutex // L1 + shortcodeState *shortcodeHandler_hugo5379 +} + +func (p *Page_hugo5379) WordCount() { + p.initContentPlainAndMeta() +} + +func (p *Page_hugo5379) initContentPlainAndMeta() { + p.initContent() + p.initPlain(true) +} + +func (p *Page_hugo5379) initPlain(lock bool) { + p.plainInit.Do(func() { + if lock { + /// Double locking here. + p.contentInitMu.Lock() + defer p.contentInitMu.Unlock() + } + }) +} + +func (p *Page_hugo5379) withoutContent() *PageWithoutContent_hugo5379 { + p.pageInit_hugo5379.withoutContentInit.Do(func() { + p.pageWithoutContent = &PageWithoutContent_hugo5379{Page_hugo5379: p} + }) + return p.pageWithoutContent +} + +func (p *Page_hugo5379) prepareForRender() error { + var err error + if err = handleShortcodes_hugo5379(p.withoutContent()); err != nil { + return err + } + return nil +} + +func (p *Page_hugo5379) setContentInit() { + p.shortcodeState.updateDelta() +} + +func (p *Page_hugo5379) initContent() { + p.contentInit.Do(func() { + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + c := make(chan error, 1) + + go func() { // G2 + // deadlocks: x > 0 + var err error + p.contentInitMu.Lock() // first lock here + defer p.contentInitMu.Unlock() + + err = p.prepareForRender() + if err != nil { + c <- err + return + } + c <- err + }() + + select { + case <-ctx.Done(): + case <-c: + } + }) +} + +type PageWithoutContent_hugo5379 struct { + *Page_hugo5379 +} + +type pageInit_hugo5379 struct { + withoutContentInit sync.Once +} + +type pageContentInit_hugo5379 struct { + contentInit sync.Once // O3 + plainInit sync.Once // O4 +} + +type HugoSites_hugo5379 struct { + Sites []*Site_hugo5379 +} + +func (h *HugoSites_hugo5379) render() { + for _, s := range h.Sites { + for _, s2 := range h.Sites { + s2.preparePagesForRender() + } + s.renderPages() + } +} + +func (h *HugoSites_hugo5379) Build() { + h.render() +} + +type Pages_hugo5379 []*Page_hugo5379 + +type PageCollections_hugo5379 struct { + Pages Pages_hugo5379 +} + +type Site_hugo5379 struct { + *PageCollections_hugo5379 +} + +func (s *Site_hugo5379) preparePagesForRender() { + for _, p := range s.Pages { + p.setContentInit() + } +} + +func (s *Site_hugo5379) renderForLayouts() { + /// Omit reflections + for _, p := range s.Pages { + p.WordCount() + } +} + +func (s *Site_hugo5379) renderAndWritePage() { + s.renderForLayouts() +} + +func (s *Site_hugo5379) renderPages() { + numWorkers := 2 + wg := &sync.WaitGroup{} + + for i := 0; i < numWorkers; i++ { + wg.Add(1) + // deadlocks: x > 0 + go pageRenderer_hugo5379(s, wg) // G3 + } + + wg.Wait() +} + +type sitesBuilder_hugo5379 struct { + H *HugoSites_hugo5379 +} + +func (s *sitesBuilder_hugo5379) Build() *sitesBuilder_hugo5379 { + return s.build() +} + +func (s *sitesBuilder_hugo5379) build() *sitesBuilder_hugo5379 { + s.H.Build() + return s +} + +func (s *sitesBuilder_hugo5379) CreateSitesE() error { + sites, err := NewHugoSites_hugo5379() + if err != nil { + return err + } + s.H = sites + return nil +} + +func (s *sitesBuilder_hugo5379) CreateSites() *sitesBuilder_hugo5379 { + if err := s.CreateSitesE(); err != nil { + log.Fatalf("Failed to create sites: %s", err) + } + return s +} + +func newHugoSites_hugo5379(sites ...*Site_hugo5379) (*HugoSites_hugo5379, error) { + h := &HugoSites_hugo5379{Sites: sites} + return h, nil +} + +func newSite_hugo5379() *Site_hugo5379 { + c := &PageCollections_hugo5379{} + s := &Site_hugo5379{ + PageCollections_hugo5379: c, + } + return s +} + +func createSitesFromConfig_hugo5379() []*Site_hugo5379 { + var ( + sites []*Site_hugo5379 + ) + + var s *Site_hugo5379 = newSite_hugo5379() + sites = append(sites, s) + return sites +} + +func NewHugoSites_hugo5379() (*HugoSites_hugo5379, error) { + sites := createSitesFromConfig_hugo5379() + return newHugoSites_hugo5379(sites...) +} + +func prepareShortcodeForPage_hugo5379(p *PageWithoutContent_hugo5379) map[int]func() error { + m := make(map[int]func() error) + m[0] = func() error { + return renderShortcode_hugo5379(p) + } + return m +} + +func renderShortcode_hugo5379(p *PageWithoutContent_hugo5379) error { + return renderShortcodeWithPage_hugo5379(p) +} + +func renderShortcodeWithPage_hugo5379(p *PageWithoutContent_hugo5379) error { + /// Omit reflections + p.WordCount() + return nil +} + +func createShortcodeRenderers_hugo5379(p *PageWithoutContent_hugo5379) map[int]func() error { + return prepareShortcodeForPage_hugo5379(p) +} + +func newShortcodeHandler_hugo5379(p *Page_hugo5379) *shortcodeHandler_hugo5379 { + return &shortcodeHandler_hugo5379{ + p: p.withoutContent(), + contentShortcodes: make(map[int]func() error), + contentShortcodesDelta: make(map[int]func() error), + } +} + +func handleShortcodes_hugo5379(p *PageWithoutContent_hugo5379) error { + return p.shortcodeState.executeShortcodesForDelta(p) +} + +func pageRenderer_hugo5379(s *Site_hugo5379, wg *sync.WaitGroup) { + defer wg.Done() + s.renderAndWritePage() +} + +func Hugo5379() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { // G1 + // deadlocks: x > 0 + b := &sitesBuilder_hugo5379{} + s := b.CreateSites() + for _, site := range s.H.Sites { + p := &Page_hugo5379{ + pageInit_hugo5379: &pageInit_hugo5379{}, + pageContentInit_hugo5379: &pageContentInit_hugo5379{}, + pageWithoutContent: &PageWithoutContent_hugo5379{}, + contentInit: sync.Once{}, + contentInitMu: sync.Mutex{}, + shortcodeState: nil, + } + p.shortcodeState = newShortcodeHandler_hugo5379(p) + site.Pages = append(site.Pages, p) + } + s.Build() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio16224.go b/src/runtime/testdata/testgoroutineleakgc/istio16224.go new file mode 100644 index 00000000000000..f3d4cfd1b1f6a4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio16224.go @@ -0,0 +1,125 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Istio16224", Istio16224) +} + +type ConfigStoreCache_istio16224 interface { + RegisterEventHandler(handler func()) + Run() +} + +type Event_istio16224 int + +type Handler_istio16224 func(Event_istio16224) + +type configstoreMonitor_istio16224 struct { + handlers []Handler_istio16224 + eventCh chan Event_istio16224 +} + +func (m *configstoreMonitor_istio16224) Run(stop <-chan struct{}) { + for { + select { + case <-stop: + // This bug is not descibed, but is a true positive (in our eyes) + // In a real run main exits when the goro is blocked here. + if _, ok := <-m.eventCh; ok { + close(m.eventCh) + } + return + case ce, ok := <-m.eventCh: + if ok { + m.processConfigEvent(ce) + } + } + } +} + +func (m *configstoreMonitor_istio16224) processConfigEvent(ce Event_istio16224) { + m.applyHandlers(ce) +} + +func (m *configstoreMonitor_istio16224) AppendEventHandler(h Handler_istio16224) { + m.handlers = append(m.handlers, h) +} + +func (m *configstoreMonitor_istio16224) applyHandlers(e Event_istio16224) { + for _, f := range m.handlers { + f(e) + } +} +func (m *configstoreMonitor_istio16224) ScheduleProcessEvent(configEvent Event_istio16224) { + m.eventCh <- configEvent +} + +type Monitor_istio16224 interface { + Run(<-chan struct{}) + AppendEventHandler(Handler_istio16224) + ScheduleProcessEvent(Event_istio16224) +} + +type controller_istio16224 struct { + monitor Monitor_istio16224 +} + +func (c *controller_istio16224) RegisterEventHandler(f func(Event_istio16224)) { + c.monitor.AppendEventHandler(f) +} + +func (c *controller_istio16224) Run(stop <-chan struct{}) { + c.monitor.Run(stop) +} + +func (c *controller_istio16224) Create() { + c.monitor.ScheduleProcessEvent(Event_istio16224(0)) +} + +func NewMonitor_istio16224() Monitor_istio16224 { + return NewBufferedMonitor_istio16224() +} + +func NewBufferedMonitor_istio16224() Monitor_istio16224 { + return &configstoreMonitor_istio16224{ + eventCh: make(chan Event_istio16224), + } +} + +func Istio16224() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + controller := &controller_istio16224{monitor: NewMonitor_istio16224()} + done := make(chan bool) + lock := sync.Mutex{} + controller.RegisterEventHandler(func(event Event_istio16224) { + lock.Lock() + defer lock.Unlock() + done <- true + }) + + stop := make(chan struct{}) + // deadlocks: x > 0 + go controller.Run(stop) + + controller.Create() + + lock.Lock() // blocks + lock.Unlock() + <-done + + close(stop) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio17860.go b/src/runtime/testdata/testgoroutineleakgc/istio17860.go new file mode 100644 index 00000000000000..a8aba84cd3f15f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio17860.go @@ -0,0 +1,139 @@ +package main + +import ( + "context" + "runtime" + + "sync" + "time" +) + +func init() { + register("Istio17860", Istio17860) +} + +type Proxy_istio17860 interface { + IsLive() bool +} + +type TestProxy_istio17860 struct { + live func() bool +} + +func (tp TestProxy_istio17860) IsLive() bool { + if tp.live == nil { + return true + } + return tp.live() +} + +type Agent_istio17860 interface { + Run(ctx context.Context) + Restart() +} + +type exitStatus_istio17860 int + +type agent_istio17860 struct { + proxy Proxy_istio17860 + mu *sync.Mutex + statusCh chan exitStatus_istio17860 + currentEpoch int + activeEpochs map[int]struct{} +} + +func (a *agent_istio17860) Run(ctx context.Context) { + for { + select { + case status := <-a.statusCh: + a.mu.Lock() + delete(a.activeEpochs, int(status)) + active := len(a.activeEpochs) + a.mu.Unlock() + if active == 0 { + return + } + case <-ctx.Done(): + return + } + } +} + +func (a *agent_istio17860) Restart() { + a.mu.Lock() + defer a.mu.Unlock() + + a.waitUntilLive() + a.currentEpoch++ + a.activeEpochs[a.currentEpoch] = struct{}{} + + // deadlocks: x > 0 + go a.runWait(a.currentEpoch) +} + +func (a *agent_istio17860) runWait(epoch int) { + a.statusCh <- exitStatus_istio17860(epoch) +} + +func (a *agent_istio17860) waitUntilLive() { + if len(a.activeEpochs) == 0 { + return + } + + interval := time.NewTicker(30 * time.Nanosecond) + timer := time.NewTimer(100 * time.Nanosecond) + defer func() { + interval.Stop() + timer.Stop() + }() + + if a.proxy.IsLive() { + return + } + + for { + select { + case <-timer.C: + return + case <-interval.C: + if a.proxy.IsLive() { + return + } + } + } +} + +func NewAgent_istio17860(proxy Proxy_istio17860) Agent_istio17860 { + return &agent_istio17860{ + proxy: proxy, + mu: &sync.Mutex{}, + statusCh: make(chan exitStatus_istio17860), + activeEpochs: make(map[int]struct{}), + } +} + +func Istio17860() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + neverLive := func() bool { + return false + } + + a := NewAgent_istio17860(TestProxy_istio17860{live: neverLive}) + go func() { a.Run(ctx) }() + + a.Restart() + go a.Restart() + + time.Sleep(200 * time.Nanosecond) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio18454.go b/src/runtime/testdata/testgoroutineleakgc/istio18454.go new file mode 100644 index 00000000000000..a795e7e8f0ff9a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio18454.go @@ -0,0 +1,149 @@ +package main + +import ( + "context" + "runtime" + + "sync" + "time" +) + +func init() { + register("Istio18454", Istio18454) +} + +const eventChCap_istio18454 = 1024 + +type Worker_istio18454 struct { + ctx context.Context + ctxCancel context.CancelFunc +} + +func (w *Worker_istio18454) Start(setupFn func(), runFn func(c context.Context)) { + if setupFn != nil { + setupFn() + } + go func() { + // deadlocks: x > 0 + runFn(w.ctx) + }() +} + +func (w *Worker_istio18454) Stop() { + w.ctxCancel() +} + +type Strategy_istio18454 struct { + timer *time.Timer + timerFrequency time.Duration + stateLock sync.Mutex + resetChan chan struct{} + worker *Worker_istio18454 + startTimerFn func() +} + +func (s *Strategy_istio18454) OnChange() { + s.stateLock.Lock() + if s.timer != nil { + s.stateLock.Unlock() + s.resetChan <- struct{}{} + return + } + s.startTimerFn() + s.stateLock.Unlock() +} + +func (s *Strategy_istio18454) startTimer() { + s.timer = time.NewTimer(s.timerFrequency) + eventLoop := func(ctx context.Context) { + for { + select { + case <-s.timer.C: + case <-s.resetChan: + if !s.timer.Stop() { + <-s.timer.C + } + s.timer.Reset(s.timerFrequency) + case <-ctx.Done(): + s.timer.Stop() + return + } + } + } + s.worker.Start(nil, eventLoop) +} + +func (s *Strategy_istio18454) Close() { + s.worker.Stop() +} + +type Event_istio18454 int + +type Processor_istio18454 struct { + stateStrategy *Strategy_istio18454 + worker *Worker_istio18454 + eventCh chan Event_istio18454 +} + +func (p *Processor_istio18454) processEvent() { + p.stateStrategy.OnChange() +} + +func (p *Processor_istio18454) Start() { + setupFn := func() { + for i := 0; i < eventChCap_istio18454; i++ { + p.eventCh <- Event_istio18454(0) + } + } + runFn := func(ctx context.Context) { + defer func() { + p.stateStrategy.Close() + }() + for { + select { + case <-ctx.Done(): + return + case <-p.eventCh: + p.processEvent() + } + } + } + p.worker.Start(setupFn, runFn) +} + +func (p *Processor_istio18454) Stop() { + p.worker.Stop() +} + +func NewWorker_istio18454() *Worker_istio18454 { + worker := &Worker_istio18454{} + worker.ctx, worker.ctxCancel = context.WithCancel(context.Background()) + return worker +} + +func Istio18454() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + stateStrategy := &Strategy_istio18454{ + timerFrequency: time.Nanosecond, + resetChan: make(chan struct{}, 1), + worker: NewWorker_istio18454(), + } + stateStrategy.startTimerFn = stateStrategy.startTimer + + p := &Processor_istio18454{ + stateStrategy: stateStrategy, + worker: NewWorker_istio18454(), + eventCh: make(chan Event_istio18454, eventChCap_istio18454), + } + + p.Start() + defer p.Stop() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go new file mode 100644 index 00000000000000..eed829866640fd --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go @@ -0,0 +1,97 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/10182 + * Buggy version: 4b990d128a17eea9058d28a3b3688ab8abafbd94 + * fix commit-id: 64ad3e17ad15cd0f9a4fd86706eec1c572033254 + * Flaky: 15/100 + * Description: + * This is a lock-channel bug. goroutine 1 is blocked on a lock + * held by goroutine 3, while goroutine 3 is blocked on sending + * message to ch, which is read by goroutine 1. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes10182", Kubernetes10182) +} + +type statusManager_kubernetes10182 struct { + podStatusesLock sync.RWMutex + podStatusChannel chan bool +} + +func (s *statusManager_kubernetes10182) Start() { + go func() { + // deadlocks: x > 0 + for i := 0; i < 2; i++ { + s.syncBatch() + } + }() +} + +func (s *statusManager_kubernetes10182) syncBatch() { + runtime.Gosched() + <-s.podStatusChannel + s.DeletePodStatus() +} + +func (s *statusManager_kubernetes10182) DeletePodStatus() { + s.podStatusesLock.Lock() + defer s.podStatusesLock.Unlock() +} + +func (s *statusManager_kubernetes10182) SetPodStatus() { + s.podStatusesLock.Lock() + defer s.podStatusesLock.Unlock() + s.podStatusChannel <- true +} + +func NewStatusManager_kubernetes10182() *statusManager_kubernetes10182 { + return &statusManager_kubernetes10182{ + podStatusChannel: make(chan bool), + } +} + +// Example of deadlock trace: +// +// G1 G2 G3 +// -------------------------------------------------------------------------------- +// s.Start() +// s.syncBatch() +// s.SetPodStatus() +// <-s.podStatusChannel +// s.podStatusesLock.Lock() +// s.podStatusChannel <- true +// s.podStatusesLock.Unlock() +// return +// s.DeletePodStatus() +// s.podStatusesLock.Lock() +// s.podStatusChannel <- true +// s.podStatusesLock.Lock() +// -----------------------------------G1,G3 leak------------------------------------- + +func Kubernetes10182() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: 0 + s := NewStatusManager_kubernetes10182() + // deadlocks: 0 + go s.Start() + // deadlocks: x > 0 + go s.SetPodStatus() + // deadlocks: x > 0 + go s.SetPodStatus() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go new file mode 100644 index 00000000000000..056495a6597b35 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go @@ -0,0 +1,114 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes11298", Kubernetes11298) +} + +type Signal_kubernetes11298 <-chan struct{} + +func After_kubernetes11298(f func()) Signal_kubernetes11298 { + ch := make(chan struct{}) + go func() { + // deadlocks: x > 0 + defer close(ch) + if f != nil { + f() + } + }() + return Signal_kubernetes11298(ch) +} + +func Until_kubernetes11298(f func(), period time.Duration, stopCh <-chan struct{}) { + if f == nil { + return + } + for { + select { + case <-stopCh: + return + default: + } + f() + select { + case <-stopCh: + case <-time.After(period): + } + } + +} + +type notifier_kubernetes11298 struct { + lock sync.Mutex + cond *sync.Cond +} + +// abort will be closed no matter what +func (n *notifier_kubernetes11298) serviceLoop(abort <-chan struct{}) { + n.lock.Lock() + defer n.lock.Unlock() + for { + select { + case <-abort: + return + default: + ch := After_kubernetes11298(func() { + n.cond.Wait() + }) + select { + case <-abort: + n.cond.Signal() + <-ch + return + case <-ch: + } + } + } +} + +// abort will be closed no matter what +func Notify_kubernetes11298(abort <-chan struct{}) { + n := ¬ifier_kubernetes11298{} + n.cond = sync.NewCond(&n.lock) + finished := After_kubernetes11298(func() { + Until_kubernetes11298(func() { + for { + select { + case <-abort: + return + default: + func() { + n.lock.Lock() + defer n.lock.Unlock() + n.cond.Signal() + }() + } + } + }, 0, abort) + }) + Until_kubernetes11298(func() { n.serviceLoop(finished) }, 0, abort) +} +func Kubernetes11298() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + done := make(chan struct{}) + notifyDone := After_kubernetes11298(func() { Notify_kubernetes11298(done) }) + go func() { + defer close(done) + time.Sleep(300 * time.Nanosecond) + }() + <-notifyDone + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go new file mode 100644 index 00000000000000..d0f445a0583fe2 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go @@ -0,0 +1,183 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/13135 + * Buggy version: 6ced66249d4fd2a81e86b4a71d8df0139fe5ceae + * fix commit-id: a12b7edc42c5c06a2e7d9f381975658692951d5a + * Flaky: 93/100 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes13135", Kubernetes13135) +} + +var ( + StopChannel_kubernetes13135 chan struct{} +) + +func Util_kubernetes13135(f func(), period time.Duration, stopCh <-chan struct{}) { + for { + select { + case <-stopCh: + return + default: + } + func() { + f() + }() + time.Sleep(period) + } +} + +type Store_kubernetes13135 interface { + Add(obj interface{}) + Replace(obj interface{}) +} + +type Reflector_kubernetes13135 struct { + store Store_kubernetes13135 +} + +func (r *Reflector_kubernetes13135) ListAndWatch(stopCh <-chan struct{}) error { + r.syncWith() + return nil +} + +func NewReflector_kubernetes13135(store Store_kubernetes13135) *Reflector_kubernetes13135 { + return &Reflector_kubernetes13135{ + store: store, + } +} + +func (r *Reflector_kubernetes13135) syncWith() { + r.store.Replace(nil) +} + +type Cacher_kubernetes13135 struct { + sync.Mutex + initialized sync.WaitGroup + initOnce sync.Once + watchCache *WatchCache_kubernetes13135 + reflector *Reflector_kubernetes13135 +} + +func (c *Cacher_kubernetes13135) processEvent() { + c.Lock() + defer c.Unlock() +} + +func (c *Cacher_kubernetes13135) startCaching(stopChannel <-chan struct{}) { + c.Lock() + for { + err := c.reflector.ListAndWatch(stopChannel) + if err == nil { + break + } + } +} + +type WatchCache_kubernetes13135 struct { + sync.RWMutex + onReplace func() + onEvent func() +} + +func (w *WatchCache_kubernetes13135) SetOnEvent(onEvent func()) { + w.Lock() + defer w.Unlock() + w.onEvent = onEvent +} + +func (w *WatchCache_kubernetes13135) SetOnReplace(onReplace func()) { + w.Lock() + defer w.Unlock() + w.onReplace = onReplace +} + +func (w *WatchCache_kubernetes13135) processEvent() { + w.Lock() + defer w.Unlock() + if w.onEvent != nil { + w.onEvent() + } +} + +func (w *WatchCache_kubernetes13135) Add(obj interface{}) { + w.processEvent() +} + +func (w *WatchCache_kubernetes13135) Replace(obj interface{}) { + w.Lock() + defer w.Unlock() + if w.onReplace != nil { + w.onReplace() + } +} + +func NewCacher_kubernetes13135() *Cacher_kubernetes13135 { + watchCache := &WatchCache_kubernetes13135{} + cacher := &Cacher_kubernetes13135{ + initialized: sync.WaitGroup{}, + watchCache: watchCache, + reflector: NewReflector_kubernetes13135(watchCache), + } + cacher.initialized.Add(1) + watchCache.SetOnReplace(func() { + cacher.initOnce.Do(func() { cacher.initialized.Done() }) + cacher.Unlock() + }) + watchCache.SetOnEvent(cacher.processEvent) + stopCh := StopChannel_kubernetes13135 + go Util_kubernetes13135(func() { cacher.startCaching(stopCh) }, 0, stopCh) // G2 + cacher.initialized.Wait() + return cacher +} + +/// +/// G1 G2 G3 +/// NewCacher() +/// watchCache.SetOnReplace() +/// watchCache.SetOnEvent() +/// cacher.startCaching() +/// c.Lock() +/// c.reflector.ListAndWatch() +/// r.syncWith() +/// r.store.Replace() +/// w.Lock() +/// w.onReplace() +/// cacher.initOnce.Do() +/// cacher.Unlock() +/// return cacher +/// c.watchCache.Add() +/// w.processEvent() +/// w.Lock() +/// cacher.startCaching() +/// c.Lock() +/// ... +/// c.Lock() +/// w.Lock() +///--------------------------------G2,G3 deadlock------------------------------------- +/// + +func Kubernetes13135() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + StopChannel_kubernetes13135 = make(chan struct{}) + for i := 0; i < 50; i++ { + go func() { + // deadlocks: x > 0 + c := NewCacher_kubernetes13135() // G1 + go c.watchCache.Add(nil) // G3 + }() + } + go close(StopChannel_kubernetes13135) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go new file mode 100644 index 00000000000000..4ca72c65ddb045 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go @@ -0,0 +1,121 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/1321 + * Buggy version: 9cd0fc70f1ca852c903b18b0933991036b3b2fa1 + * fix commit-id: 435e0b73bb99862f9dedf56a50260ff3dfef14ff + * Flaky: 1/100 + * Description: + * This is a lock-channel bug. The first goroutine invokes + * distribute() function. distribute() function holds m.lock.Lock(), + * while blocking at sending message to w.result. The second goroutine + * invokes stopWatching() funciton, which can unblock the first + * goroutine by closing w.result. However, in order to close w.result, + * stopWatching() function needs to acquire m.lock.Lock() firstly. + * The fix is to introduce another channel and put receive message + * from the second channel in the same select as the w.result. Close + * the second channel can unblock the first goroutine, while no need + * to hold m.lock.Lock(). + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes1321", Kubernetes1321) +} + +type muxWatcher_kubernetes1321 struct { + result chan struct{} + m *Mux_kubernetes1321 + id int64 +} + +func (mw *muxWatcher_kubernetes1321) Stop() { + mw.m.stopWatching(mw.id) +} + +type Mux_kubernetes1321 struct { + lock sync.Mutex + watchers map[int64]*muxWatcher_kubernetes1321 +} + +func NewMux_kubernetes1321() *Mux_kubernetes1321 { + m := &Mux_kubernetes1321{ + watchers: map[int64]*muxWatcher_kubernetes1321{}, + } + // deadlocks: x > 0 + go m.loop() // G2 + return m +} + +func (m *Mux_kubernetes1321) Watch() *muxWatcher_kubernetes1321 { + mw := &muxWatcher_kubernetes1321{ + result: make(chan struct{}), + m: m, + id: int64(len(m.watchers)), + } + m.watchers[mw.id] = mw + runtime.Gosched() + return mw +} + +func (m *Mux_kubernetes1321) loop() { + for i := 0; i < 100; i++ { + m.distribute() + } +} + +func (m *Mux_kubernetes1321) distribute() { + m.lock.Lock() + defer m.lock.Unlock() + for _, w := range m.watchers { + w.result <- struct{}{} + runtime.Gosched() + } +} + +func (m *Mux_kubernetes1321) stopWatching(id int64) { + m.lock.Lock() + defer m.lock.Unlock() + w, ok := m.watchers[id] + if !ok { + return + } + delete(m.watchers, id) + close(w.result) +} + +func testMuxWatcherClose_kubernetes1321() { + // deadlocks: x > 0 + m := NewMux_kubernetes1321() + m.watchers[m.Watch().id].Stop() +} + +/// +/// G1 G2 +/// testMuxWatcherClose() +/// NewMux() +/// m.loop() +/// m.distribute() +/// m.lock.Lock() +/// w.result <- true +/// w := m.Watch() +/// w.Stop() +/// mw.m.stopWatching() +/// m.lock.Lock() +/// ---------------G1,G2 deadlock--------------- +/// + +func Kubernetes1321() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go testMuxWatcherClose_kubernetes1321() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go new file mode 100644 index 00000000000000..e393ce9d5e0a40 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go @@ -0,0 +1,81 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/25331 + * Buggy version: 5dd087040bb13434f1ddf2f0693d0203c30f28cb + * fix commit-id: 97f4647dc3d8cf46c2b66b89a31c758a6edfb57c + * Flaky: 100/100 + * Description: + * In reflector.go, it could probably call Stop() without retrieving + * all results from ResultChan(). See here. A potential leak is that + * when an error has happened, it could block on resultChan, and then + * cancelling context in Stop() wouldn't unblock it. + */ +package main + +import ( + "context" + "errors" + "runtime" + "time" +) + +func init() { + register("Kubernetes25331", Kubernetes25331) +} + +type watchChan_kubernetes25331 struct { + ctx context.Context + cancel context.CancelFunc + resultChan chan bool + errChan chan error +} + +func (wc *watchChan_kubernetes25331) Stop() { + wc.errChan <- errors.New("Error") + wc.cancel() +} + +func (wc *watchChan_kubernetes25331) run() { + select { + case err := <-wc.errChan: + errResult := len(err.Error()) != 0 + wc.cancel() // Removed in fix + wc.resultChan <- errResult + case <-wc.ctx.Done(): + } +} + +func NewWatchChan_kubernetes25331() *watchChan_kubernetes25331 { + ctx, cancel := context.WithCancel(context.Background()) + return &watchChan_kubernetes25331{ + ctx: ctx, + cancel: cancel, + resultChan: make(chan bool), + errChan: make(chan error), + } +} + +/// +/// G1 G2 +/// wc.run() +/// wc.Stop() +/// wc.errChan <- +/// wc.cancel() +/// <-wc.errChan +/// wc.cancel() +/// wc.resultChan <- +/// -------------G1 leak---------------- +/// + +func Kubernetes25331() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + wc := NewWatchChan_kubernetes25331() + // deadlocks: 1 + go wc.run() // G1 + go wc.Stop() // G2 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go new file mode 100644 index 00000000000000..cf1b79cd57d8e5 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go @@ -0,0 +1,83 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes26980", Kubernetes26980) +} + +type processorListener_kubernetes26980 struct { + lock sync.RWMutex + cond sync.Cond + + pendingNotifications []interface{} +} + +func (p *processorListener_kubernetes26980) add(notification interface{}) { + p.lock.Lock() + defer p.lock.Unlock() + + p.pendingNotifications = append(p.pendingNotifications, notification) + p.cond.Broadcast() +} + +func (p *processorListener_kubernetes26980) pop(stopCh <-chan struct{}) { + p.lock.Lock() + runtime.Gosched() + defer p.lock.Unlock() + for { + for len(p.pendingNotifications) == 0 { + select { + case <-stopCh: + return + default: + } + p.cond.Wait() + } + select { + case <-stopCh: + return + } + } +} + +func newProcessListener_kubernetes26980() *processorListener_kubernetes26980 { + ret := &processorListener_kubernetes26980{ + pendingNotifications: []interface{}{}, + } + ret.cond.L = &ret.lock + return ret +} +func Kubernetes26980() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 3000; i++ { + go func() { + // deadlocks: x > 0 + pl := newProcessListener_kubernetes26980() + stopCh := make(chan struct{}) + defer close(stopCh) + pl.add(1) + runtime.Gosched() + // deadlocks: x > 0 + go pl.pop(stopCh) + + resultCh := make(chan struct{}) + go func() { + // deadlocks: x > 0 + pl.lock.Lock() + close(resultCh) + }() + runtime.Gosched() + <-resultCh + pl.lock.Unlock() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go new file mode 100644 index 00000000000000..4b306e40794337 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go @@ -0,0 +1,256 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes30872", Kubernetes30872) +} + +type PopProcessFunc_kubernetes30872 func() + +type ProcessFunc_kubernetes30872 func() + +func Util_kubernetes30872(f func(), stopCh <-chan struct{}) { + JitterUntil_kubernetes30872(f, stopCh) +} + +func JitterUntil_kubernetes30872(f func(), stopCh <-chan struct{}) { + for { + select { + case <-stopCh: + return + default: + } + func() { + f() + }() + } +} + +type Queue_kubernetes30872 interface { + HasSynced() + Pop(PopProcessFunc_kubernetes30872) +} + +type Config_kubernetes30872 struct { + Queue Queue_kubernetes30872 + Process ProcessFunc_kubernetes30872 +} + +type Controller_kubernetes30872 struct { + config Config_kubernetes30872 +} + +func (c *Controller_kubernetes30872) Run(stopCh <-chan struct{}) { + Util_kubernetes30872(c.processLoop, stopCh) +} + +func (c *Controller_kubernetes30872) HasSynced() { + c.config.Queue.HasSynced() +} + +func (c *Controller_kubernetes30872) processLoop() { + c.config.Queue.Pop(PopProcessFunc_kubernetes30872(c.config.Process)) +} + +type ControllerInterface_kubernetes30872 interface { + Run(<-chan struct{}) + HasSynced() +} + +type ResourceEventHandler_kubernetes30872 interface { + OnAdd() +} + +type ResourceEventHandlerFuncs_kubernetes30872 struct { + AddFunc func() +} + +func (r ResourceEventHandlerFuncs_kubernetes30872) OnAdd() { + if r.AddFunc != nil { + r.AddFunc() + } +} + +type informer_kubernetes30872 struct { + controller ControllerInterface_kubernetes30872 + + stopChan chan struct{} +} + +type federatedInformerImpl_kubernetes30872 struct { + sync.Mutex + clusterInformer informer_kubernetes30872 +} + +func (f *federatedInformerImpl_kubernetes30872) ClustersSynced() { + f.Lock() // L1 + defer f.Unlock() + f.clusterInformer.controller.HasSynced() +} + +func (f *federatedInformerImpl_kubernetes30872) addCluster() { + f.Lock() // L1 + defer f.Unlock() +} + +func (f *federatedInformerImpl_kubernetes30872) Start() { + f.Lock() // L1 + defer f.Unlock() + + f.clusterInformer.stopChan = make(chan struct{}) + // deadlocks: x > 0 + go f.clusterInformer.controller.Run(f.clusterInformer.stopChan) // G2 + runtime.Gosched() +} + +func (f *federatedInformerImpl_kubernetes30872) Stop() { + f.Lock() // L1 + defer f.Unlock() + close(f.clusterInformer.stopChan) +} + +type DelayingDeliverer_kubernetes30872 struct{} + +func (d *DelayingDeliverer_kubernetes30872) StartWithHandler(handler func()) { + go func() { // G4 + // deadlocks: x > 0 + handler() + }() +} + +type FederationView_kubernetes30872 interface { + ClustersSynced() +} + +type FederatedInformer_kubernetes30872 interface { + FederationView_kubernetes30872 + Start() + Stop() +} + +type NamespaceController_kubernetes30872 struct { + namespaceDeliverer *DelayingDeliverer_kubernetes30872 + namespaceFederatedInformer FederatedInformer_kubernetes30872 +} + +func (nc *NamespaceController_kubernetes30872) isSynced() { + nc.namespaceFederatedInformer.ClustersSynced() +} + +func (nc *NamespaceController_kubernetes30872) reconcileNamespace() { + nc.isSynced() +} + +func (nc *NamespaceController_kubernetes30872) Run(stopChan <-chan struct{}) { + nc.namespaceFederatedInformer.Start() + go func() { // G3 + // deadlocks: x > 0 + <-stopChan + nc.namespaceFederatedInformer.Stop() + }() + nc.namespaceDeliverer.StartWithHandler(func() { + nc.reconcileNamespace() + }) +} + +type DeltaFIFO_kubernetes30872 struct { + lock sync.RWMutex +} + +func (f *DeltaFIFO_kubernetes30872) HasSynced() { + f.lock.Lock() // L2 + defer f.lock.Unlock() +} + +func (f *DeltaFIFO_kubernetes30872) Pop(process PopProcessFunc_kubernetes30872) { + f.lock.Lock() // L2 + defer f.lock.Unlock() + process() +} + +func NewFederatedInformer_kubernetes30872() FederatedInformer_kubernetes30872 { + federatedInformer := &federatedInformerImpl_kubernetes30872{} + federatedInformer.clusterInformer.controller = NewInformer_kubernetes30872( + ResourceEventHandlerFuncs_kubernetes30872{ + AddFunc: func() { + federatedInformer.addCluster() + }, + }) + return federatedInformer +} + +func NewInformer_kubernetes30872(h ResourceEventHandler_kubernetes30872) *Controller_kubernetes30872 { + fifo := &DeltaFIFO_kubernetes30872{} + cfg := &Config_kubernetes30872{ + Queue: fifo, + Process: func() { + h.OnAdd() + }, + } + return &Controller_kubernetes30872{config: *cfg} +} + +func NewNamespaceController_kubernetes30872() *NamespaceController_kubernetes30872 { + nc := &NamespaceController_kubernetes30872{} + nc.namespaceDeliverer = &DelayingDeliverer_kubernetes30872{} + nc.namespaceFederatedInformer = NewFederatedInformer_kubernetes30872() + return nc +} + +func Kubernetes30872() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { // G1 + namespaceController := NewNamespaceController_kubernetes30872() + stop := make(chan struct{}) + namespaceController.Run(stop) + close(stop) + }() + } +} + +/// Example of deadlocking trace. +/// +/// G1 G2 G3 G4 +/// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +/// namespaceController.Run() +/// nc.namespaceFederatedInformer.Start() +/// f.Lock() [L1] +/// go f.clusterInformer.controller.Run()[G2] +/// <<>> +/// . Util(c.processLoop, stopCh) +/// . c.config.Queue.Pop() +/// . f.lock.Lock() [L2] +/// . process() +/// . h.OnAdd() +/// . r.AddFunc() +/// . federatedInformer.addCluster() +/// . f.Lock() [L1] +/// f.Unlock() [L1] . +/// go func()[G3] . +/// nc.namespaceDeliverer.StartWithHandler() . . +/// go func()[G4] . . +/// close(stop) . . . +/// <<>> . . . +/// . <-stopChan . +/// . nc.namespaceFederatedInformer.Stop() . +/// . f.Lock() [L1] . +/// . . handler() +/// . . nc.reconcileNamespace() +/// . . nc.isSynced() +/// . . nc.namespaceFederatedInformer.ClustersSynced() +/// . . f.Lock() [L1] +/// . . f.clusterInformer.controller.HasSynced() +/// . . c.config.Queue.HasSynced() +/// . . f.lock.Lock() [L2] +///----------------------------------------------------------------------------G2,G3,G4 leak---------------------------------------------------------------------------------------------- +/// diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go new file mode 100644 index 00000000000000..b204230c29a3a5 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go @@ -0,0 +1,75 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes38669", Kubernetes38669) +} + +type Event_kubernetes38669 int +type watchCacheEvent_kubernetes38669 int + +type cacheWatcher_kubernetes38669 struct { + sync.Mutex + input chan watchCacheEvent_kubernetes38669 + result chan Event_kubernetes38669 + stopped bool +} + +func (c *cacheWatcher_kubernetes38669) process(initEvents []watchCacheEvent_kubernetes38669) { + for _, event := range initEvents { + c.sendWatchCacheEvent(&event) + } + defer close(c.result) + defer c.Stop() + for { + _, ok := <-c.input + if !ok { + return + } + } +} + +func (c *cacheWatcher_kubernetes38669) sendWatchCacheEvent(event *watchCacheEvent_kubernetes38669) { + c.result <- Event_kubernetes38669(*event) +} + +func (c *cacheWatcher_kubernetes38669) Stop() { + c.stop() +} + +func (c *cacheWatcher_kubernetes38669) stop() { + c.Lock() + defer c.Unlock() + if !c.stopped { + c.stopped = true + close(c.input) + } +} + +func newCacheWatcher_kubernetes38669(chanSize int, initEvents []watchCacheEvent_kubernetes38669) *cacheWatcher_kubernetes38669 { + watcher := &cacheWatcher_kubernetes38669{ + input: make(chan watchCacheEvent_kubernetes38669, chanSize), + result: make(chan Event_kubernetes38669, chanSize), + stopped: false, + } + // deadlocks: 1 + go watcher.process(initEvents) + return watcher +} + +func Kubernetes38669() { + defer func() { + time.Sleep(1 * time.Second) + runtime.GC() + }() + go func() { + initEvents := []watchCacheEvent_kubernetes38669{1, 2} + w := newCacheWatcher_kubernetes38669(0, initEvents) + w.Stop() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go new file mode 100644 index 00000000000000..540988ba774c66 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go @@ -0,0 +1,69 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/5316 + * Buggy version: c868b0bbf09128960bc7c4ada1a77347a464d876 + * fix commit-id: cc3a433a7abc89d2f766d4c87eaae9448e3dc091 + * Flaky: 100/100 + * Description: + * If the main goroutine selects a case that doesn’t consumes + * the channels, the anonymous goroutine will be blocked on sending + * to channel. + */ + +package main + +import ( + "errors" + "math/rand" + "runtime" + "time" +) + +func init() { + register("Kubernetes5316", Kubernetes5316) +} + +func finishRequest_kubernetes5316(timeout time.Duration, fn func() error) { + ch := make(chan bool) + errCh := make(chan error) + go func() { // G2 + // deadlocks: 1 + if err := fn(); err != nil { + errCh <- err + } else { + ch <- true + } + }() + + select { + case <-ch: + case <-errCh: + case <-time.After(timeout): + } +} + +/// +/// G1 G2 +/// finishRequest() +/// fn() +/// time.After() +/// errCh<-/ch<- +/// --------------G2 leak---------------- +/// + +func Kubernetes5316() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + fn := func() error { + time.Sleep(2 * time.Millisecond) + if rand.Intn(10) > 5 { + return errors.New("Error") + } + return nil + } + go finishRequest_kubernetes5316(time.Millisecond, fn) // G1 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go new file mode 100644 index 00000000000000..9e328fc5330e2f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go @@ -0,0 +1,134 @@ +/* + * Project: kubernetes + * Tag: Reproduce misbehavior + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/58107 + * Buggy version: 2f17d782eb2772d6401da7ddced9ac90656a7a79 + * fix commit-id: 010a127314a935d8d038f8dd4559fc5b249813e4 + * Flaky: 53/100 + * Description: + * The rules for read and write lock: allows concurrent read lock; + * write lock has higher priority than read lock. + * There are two queues (queue 1 and queue 2) involved in this bug, + * and the two queues are protected by the same read-write lock + * (rq.workerLock.RLock()). Before getting an element from queue 1 or + * queue 2, rq.workerLock.RLock() is acquired. If the queue is empty, + * cond.Wait() will be invoked. There is another goroutine (goroutine D), + * which will periodically invoke rq.workerLock.Lock(). Under the following + * situation, deadlock will happen. Queue 1 is empty, so that some goroutines + * hold rq.workerLock.RLock(), and block at cond.Wait(). Goroutine D is + * blocked when acquiring rq.workerLock.Lock(). Some goroutines try to process + * jobs in queue 2, but they are blocked when acquiring rq.workerLock.RLock(), + * since write lock has a higher priority. + * The fix is to not acquire rq.workerLock.RLock(), while pulling data + * from any queue. Therefore, when a goroutine is blocked at cond.Wait(), + * rq.workLock.RLock() is not held. + */ + +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes58107", Kubernetes58107) +} + +type RateLimitingInterface_kubernetes58107 interface { + Get() + Put() +} + +type Type_kubernetes58107 struct { + cond *sync.Cond +} + +func (q *Type_kubernetes58107) Get() { + q.cond.L.Lock() + defer q.cond.L.Unlock() + q.cond.Wait() +} + +func (q *Type_kubernetes58107) Put() { + q.cond.Signal() +} + +type ResourceQuotaController_kubernetes58107 struct { + workerLock sync.RWMutex + queue RateLimitingInterface_kubernetes58107 + missingUsageQueue RateLimitingInterface_kubernetes58107 +} + +func (rq *ResourceQuotaController_kubernetes58107) worker(queue RateLimitingInterface_kubernetes58107, _ string) { + workFunc := func() bool { + rq.workerLock.RLock() + defer rq.workerLock.RUnlock() + queue.Get() + return true + } + for { + if quit := workFunc(); quit { + return + } + } +} + +func (rq *ResourceQuotaController_kubernetes58107) Run() { + // deadlocks: x > 0 + go rq.worker(rq.queue, "G1") // G3 + // deadlocks: x > 0 + go rq.worker(rq.missingUsageQueue, "G2") // G4 +} + +func (rq *ResourceQuotaController_kubernetes58107) Sync() { + for i := 0; i < 100000; i++ { + rq.workerLock.Lock() + runtime.Gosched() + rq.workerLock.Unlock() + } +} + +func (rq *ResourceQuotaController_kubernetes58107) HelperSignals() { + for i := 0; i < 100000; i++ { + rq.queue.Put() + rq.missingUsageQueue.Put() + } +} + +func startResourceQuotaController_kubernetes58107() { + resourceQuotaController := &ResourceQuotaController_kubernetes58107{ + queue: &Type_kubernetes58107{sync.NewCond(&sync.Mutex{})}, + missingUsageQueue: &Type_kubernetes58107{sync.NewCond(&sync.Mutex{})}, + } + + go resourceQuotaController.Run() // G2 + // deadlocks: x > 0 + go resourceQuotaController.Sync() // G5 + resourceQuotaController.HelperSignals() +} + +func Kubernetes58107() { + defer func() { + time.Sleep(1000 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go startResourceQuotaController_kubernetes58107() // G1 + } +} + +// Example of deadlock: +// +// G1 G3 G4 G5 +// ------------------------------------------------------------------------------------------------------------ +// <<>> (no more signals) ... ... Sync() +// rq.workerLock.RLock() . . +// q.cond.L.Lock() . . +// q.cond.Wait() . . +// . . rq.workerLock.Lock() +// . rq.workerLock.RLock() . +// . q.cond.L.Lock() . +// --------------------------------------------G3, G4, G5 leak------------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go new file mode 100644 index 00000000000000..48bcb7efe6fe95 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go @@ -0,0 +1,119 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/62464 + * Buggy version: a048ca888ad27367b1a7b7377c67658920adbf5d + * fix commit-id: c1b19fce903675b82e9fdd1befcc5f5d658bfe78 + * Flaky: 8/100 + * Description: + * This is another example for recursive read lock bug. It has + * been noticed by the go developers that RLock should not be + * recursively used in the same thread. + */ + +package main + +import ( + "math/rand" + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes62464", Kubernetes62464) +} + +type State_kubernetes62464 interface { + GetCPUSetOrDefault() + GetCPUSet() bool + GetDefaultCPUSet() + SetDefaultCPUSet() +} + +type stateMemory_kubernetes62464 struct { + sync.RWMutex +} + +func (s *stateMemory_kubernetes62464) GetCPUSetOrDefault() { + s.RLock() + defer s.RUnlock() + if ok := s.GetCPUSet(); ok { + return + } + s.GetDefaultCPUSet() +} + +func (s *stateMemory_kubernetes62464) GetCPUSet() bool { + runtime.Gosched() + s.RLock() + defer s.RUnlock() + + if rand.Intn(10) > 5 { + return true + } + return false +} + +func (s *stateMemory_kubernetes62464) GetDefaultCPUSet() { + s.RLock() + defer s.RUnlock() +} + +func (s *stateMemory_kubernetes62464) SetDefaultCPUSet() { + s.Lock() + runtime.Gosched() + defer s.Unlock() +} + +type staticPolicy_kubernetes62464 struct{} + +func (p *staticPolicy_kubernetes62464) RemoveContainer(s State_kubernetes62464) { + s.GetDefaultCPUSet() + s.SetDefaultCPUSet() +} + +type manager_kubernetes62464 struct { + state *stateMemory_kubernetes62464 +} + +func (m *manager_kubernetes62464) reconcileState() { + m.state.GetCPUSetOrDefault() +} + +func NewPolicyAndManager_kubernetes62464() (*staticPolicy_kubernetes62464, *manager_kubernetes62464) { + s := &stateMemory_kubernetes62464{} + m := &manager_kubernetes62464{s} + p := &staticPolicy_kubernetes62464{} + return p, m +} + +/// +/// G1 G2 +/// m.reconcileState() +/// m.state.GetCPUSetOrDefault() +/// s.RLock() +/// s.GetCPUSet() +/// p.RemoveContainer() +/// s.GetDefaultCPUSet() +/// s.SetDefaultCPUSet() +/// s.Lock() +/// s.RLock() +/// ---------------------G1,G2 deadlock--------------------- +/// + +func Kubernetes62464() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + p, m := NewPolicyAndManager_kubernetes62464() + // deadlocks: x > 0 + go m.reconcileState() + // deadlocks: x > 0 + go p.RemoveContainer(m.state) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go new file mode 100644 index 00000000000000..26fc0f3db03596 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go @@ -0,0 +1,99 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/6632 + * Buggy version: e597b41d939573502c8dda1dde7bf3439325fb5d + * fix commit-id: 82afb7ab1fe12cf2efceede2322d082eaf5d5adc + * Flaky: 4/100 + * Description: + * This is a lock-channel bug. When resetChan is full, WriteFrame + * holds the lock and blocks on the channel. Then monitor() fails + * to close the resetChan because lock is already held by WriteFrame. + * Fix: create a goroutine to drain the channel + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes6632", Kubernetes6632) +} + +type Connection_kubernetes6632 struct { + closeChan chan bool +} + +type idleAwareFramer_kubernetes6632 struct { + resetChan chan bool + writeLock sync.Mutex + conn *Connection_kubernetes6632 +} + +func (i *idleAwareFramer_kubernetes6632) monitor() { + var resetChan = i.resetChan +Loop: + for { + select { + case <-i.conn.closeChan: + i.writeLock.Lock() + close(resetChan) + i.resetChan = nil + i.writeLock.Unlock() + break Loop + } + } +} + +func (i *idleAwareFramer_kubernetes6632) WriteFrame() { + i.writeLock.Lock() + defer i.writeLock.Unlock() + if i.resetChan == nil { + return + } + i.resetChan <- true +} + +func NewIdleAwareFramer_kubernetes6632() *idleAwareFramer_kubernetes6632 { + return &idleAwareFramer_kubernetes6632{ + resetChan: make(chan bool), + conn: &Connection_kubernetes6632{ + closeChan: make(chan bool), + }, + } +} + +/// +/// G1 G2 helper goroutine +/// i.monitor() +/// <-i.conn.closeChan +/// i.WriteFrame() +/// i.writeLock.Lock() +/// i.resetChan <- +/// i.conn.closeChan<- +/// i.writeLock.Lock() +/// ----------------------G1,G2 deadlock------------------------ +/// + +func Kubernetes6632() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + i := NewIdleAwareFramer_kubernetes6632() + + go func() { // helper goroutine + i.conn.closeChan <- true + }() + // deadlocks: x > 0 + go i.monitor() // G1 + // deadlocks: x > 0 + go i.WriteFrame() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go new file mode 100644 index 00000000000000..234f4145695a8c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go @@ -0,0 +1,92 @@ +package main + +import ( + "runtime" + "time" +) + +func init() { + register("Kubernetes70277", Kubernetes70277) +} + +type WaitFunc_kubernetes70277 func(done <-chan struct{}) <-chan struct{} + +type ConditionFunc_kubernetes70277 func() (done bool, err error) + +func WaitFor_kubernetes70277(wait WaitFunc_kubernetes70277, fn ConditionFunc_kubernetes70277, done <-chan struct{}) error { + c := wait(done) + for { + _, open := <-c + ok, err := fn() + if err != nil { + return err + } + if ok { + return nil + } + if !open { + break + } + } + return nil +} + +func poller_kubernetes70277(interval, timeout time.Duration) WaitFunc_kubernetes70277 { + return WaitFunc_kubernetes70277(func(done <-chan struct{}) <-chan struct{} { + ch := make(chan struct{}) + go func() { + defer close(ch) + + tick := time.NewTicker(interval) + defer tick.Stop() + + var after <-chan time.Time + if timeout != 0 { + timer := time.NewTimer(timeout) + after = timer.C + defer timer.Stop() + } + for { + select { + case <-tick.C: + select { + case ch <- struct{}{}: + default: + } + case <-after: + return + case <-done: + return + } + } + }() + + return ch + }) +} + +func Kubernetes70277() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + stopCh := make(chan struct{}) + defer close(stopCh) + waitFunc := poller_kubernetes70277(time.Millisecond, 80*time.Millisecond) + var doneCh <-chan struct{} + + WaitFor_kubernetes70277(func(done <-chan struct{}) <-chan struct{} { + doneCh = done + return waitFunc(done) + }, func() (bool, error) { + time.Sleep(10 * time.Millisecond) + return true, nil + }, stopCh) + + <-doneCh // block here + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/main.go b/src/runtime/testdata/testgoroutineleakgc/main.go new file mode 100644 index 00000000000000..ae491a2a978043 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/main.go @@ -0,0 +1,35 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "os" + +var cmds = map[string]func(){} + +func register(name string, f func()) { + if cmds[name] != nil { + panic("duplicate registration: " + name) + } + cmds[name] = f +} + +func registerInit(name string, f func()) { + if len(os.Args) >= 2 && os.Args[1] == name { + f() + } +} + +func main() { + if len(os.Args) < 2 { + println("usage: " + os.Args[0] + " name-of-test") + return + } + f := cmds[os.Args[1]] + if f == nil { + println("unknown function: " + os.Args[1]) + return + } + f() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby17176.go b/src/runtime/testdata/testgoroutineleakgc/moby17176.go new file mode 100644 index 00000000000000..e3cc10a2276da6 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby17176.go @@ -0,0 +1,74 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/17176 + * Buggy version: d295dc66521e2734390473ec1f1da8a73ad3288a + * fix commit-id: 2f16895ee94848e2d8ad72bc01968b4c88d84cb8 + * Flaky: 100/100 + * Description: + * devices.nrDeletedDevices takes devices.Lock() but does + * not drop it if there are no deleted devices. This will block + * other goroutines trying to acquire devices.Lock(). + * In general reason is that when device deletion is happning, + * we can try deletion/deactivation in a loop. And that that time + * we don't want to block rest of the device operations in parallel. + * So we drop the inner devices lock while continue to hold per + * device lock + * A test is added for this bug, and we need to try whether + * this bug can be reproduced. + */ +package main + +import ( + "errors" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby17176", Moby17176) +} + +type DeviceSet_moby17176 struct { + sync.Mutex + nrDeletedDevices int +} + +func (devices *DeviceSet_moby17176) cleanupDeletedDevices() error { + devices.Lock() + if devices.nrDeletedDevices == 0 { + /// Missing devices.Unlock() + return nil + } + devices.Unlock() + return errors.New("Error") +} + +func testDevmapperLockReleasedDeviceDeletion_moby17176() { + ds := &DeviceSet_moby17176{ + nrDeletedDevices: 0, + } + ds.cleanupDeletedDevices() + doneChan := make(chan bool) + go func() { + // deadlocks: x > 0 + ds.Lock() + defer ds.Unlock() + doneChan <- true + }() + + select { + case <-time.After(time.Millisecond): + case <-doneChan: + } +} +func Moby17176() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go testDevmapperLockReleasedDeviceDeletion_moby17176() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby21233.go b/src/runtime/testdata/testgoroutineleakgc/moby21233.go new file mode 100644 index 00000000000000..837bd6cb344328 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby21233.go @@ -0,0 +1,167 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/21233 + * Buggy version: cc12d2bfaae135e63b1f962ad80e6943dd995337 + * fix commit-id: 2f4aa9658408ac72a598363c6e22eadf93dbb8a7 + * Flaky:100/100 + * Description: + * This test was checking that it received every progress update that was + * produced. But delivery of these intermediate progress updates is not + * guaranteed. A new update can overwrite the previous one if the previous + * one hasn't been sent to the channel yet. + * The call to t.Fatalf exited the cur rent goroutine which was consuming + * the channel, which caused a deadlock and eventual test timeout rather + * than a proper failure message. + */ +package main + +import ( + "math/rand" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby21233", Moby21233) +} + +type Progress_moby21233 struct{} + +type Output_moby21233 interface { + WriteProgress(Progress_moby21233) error +} + +type chanOutput_moby21233 chan<- Progress_moby21233 + +type TransferManager_moby21233 struct { + mu sync.Mutex +} + +type Transfer_moby21233 struct { + mu sync.Mutex +} + +type Watcher_moby21233 struct { + signalChan chan struct{} + releaseChan chan struct{} + running chan struct{} +} + +func ChanOutput_moby21233(progressChan chan<- Progress_moby21233) Output_moby21233 { + return chanOutput_moby21233(progressChan) +} +func (out chanOutput_moby21233) WriteProgress(p Progress_moby21233) error { + out <- p + return nil +} +func NewTransferManager_moby21233() *TransferManager_moby21233 { + return &TransferManager_moby21233{} +} +func NewTransfer_moby21233() *Transfer_moby21233 { + return &Transfer_moby21233{} +} +func (t *Transfer_moby21233) Release(watcher *Watcher_moby21233) { + t.mu.Lock() + t.mu.Unlock() + close(watcher.releaseChan) + <-watcher.running +} +func (t *Transfer_moby21233) Watch(progressOutput Output_moby21233) *Watcher_moby21233 { + t.mu.Lock() + defer t.mu.Unlock() + lastProgress := Progress_moby21233{} + w := &Watcher_moby21233{ + releaseChan: make(chan struct{}), + signalChan: make(chan struct{}), + running: make(chan struct{}), + } + go func() { // G2 + // deadlocks: x > 0 + defer func() { + close(w.running) + }() + done := false + for { + t.mu.Lock() + t.mu.Unlock() + if rand.Int31n(2) >= 1 { + progressOutput.WriteProgress(lastProgress) + } + if done { + return + } + select { + case <-w.signalChan: + case <-w.releaseChan: + done = true + } + } + }() + return w +} +func (tm *TransferManager_moby21233) Transfer(progressOutput Output_moby21233) (*Transfer_moby21233, *Watcher_moby21233) { + tm.mu.Lock() + defer tm.mu.Unlock() + t := NewTransfer_moby21233() + return t, t.Watch(progressOutput) +} + +func testTransfer_moby21233() { // G1 + // deadlocks: x > 0 + tm := NewTransferManager_moby21233() + progressChan := make(chan Progress_moby21233) + progressDone := make(chan struct{}) + go func() { // G3 + time.Sleep(1 * time.Millisecond) + for p := range progressChan { /// Chan consumer + if rand.Int31n(2) >= 1 { + return + } + _ = p + } + close(progressDone) + }() + time.Sleep(1 * time.Millisecond) + ids := []string{"id1", "id2", "id3"} + xrefs := make([]*Transfer_moby21233, len(ids)) + watchers := make([]*Watcher_moby21233, len(ids)) + for i := range ids { + xrefs[i], watchers[i] = tm.Transfer(ChanOutput_moby21233(progressChan)) /// Chan producer + time.Sleep(2 * time.Millisecond) + } + + for i := range xrefs { + xrefs[i].Release(watchers[i]) + } + + close(progressChan) + <-progressDone +} + +func Moby21233() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go testTransfer_moby21233() // G1 + } +} + +// Example deadlock trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// testTransfer() +// tm.Transfer() +// t.Watch() +// . WriteProgress() +// . ProgressChan<- +// . . <-progressChan +// . . rand.Int31n(2) >= 1 +// . . return +// . ProgressChan<- . +// <-watcher.running +// ----------------------G1, G2 leak-------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/moby25348.go b/src/runtime/testdata/testgoroutineleakgc/moby25348.go new file mode 100644 index 00000000000000..4370779e9885c2 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby25348.go @@ -0,0 +1,58 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/25384 + * Buggy version: 58befe3081726ef74ea09198cd9488fb42c51f51 + * fix commit-id: 42360d164b9f25fb4b150ef066fcf57fa39559a7 + * Flaky: 100/100 + * Description: + * When n=1 (len(pm.plugins)), the location of group.Wait() doesn’t matter. + * When n is larger than 1, group.Wait() is invoked in each iteration. Whenever + * group.Wait() is invoked, it waits for group.Done() to be executed n times. + * However, group.Done() is only executed once in one iteration. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby25348", Moby25348) +} + +type plugin_moby25348 struct{} + +type Manager_moby25348 struct { + plugins []*plugin_moby25348 +} + +func (pm *Manager_moby25348) init() { + var group sync.WaitGroup + group.Add(len(pm.plugins)) + for _, p := range pm.plugins { + go func(p *plugin_moby25348) { + defer group.Done() + }(p) + group.Wait() // Block here + } +} + +func Moby25348() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func() { + p1 := &plugin_moby25348{} + p2 := &plugin_moby25348{} + pm := &Manager_moby25348{ + plugins: []*plugin_moby25348{p1, p2}, + } + // deadlocks: 100 + go pm.init() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby27782.go b/src/runtime/testdata/testgoroutineleakgc/moby27782.go new file mode 100644 index 00000000000000..36c715c33ed8de --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby27782.go @@ -0,0 +1,266 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/27782 + * Buggy version: 18768fdc2e76ec6c600c8ab57d2d487ee7877794 + * fix commit-id: a69a59ffc7e3d028a72d1195c2c1535f447eaa84 + * Flaky: 2/100 + */ +package main + +import ( + "errors" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby27782", Moby27782) +} + +type Event_moby27782 struct { + Op Op_moby27782 +} + +type Op_moby27782 uint32 + +const ( + Create_moby27782 Op_moby27782 = 1 << iota + Write_moby27782 + Remove_moby27782 + Rename_moby27782 + Chmod_moby27782 +) + +func newEvent(op Op_moby27782) Event_moby27782 { + return Event_moby27782{op} +} + +func (e *Event_moby27782) ignoreLinux(w *Watcher_moby27782) bool { + if e.Op != Write_moby27782 { + w.mu.Lock() + defer w.mu.Unlock() + w.cv.Broadcast() + return true + } + runtime.Gosched() + return false +} + +type Watcher_moby27782 struct { + Events chan Event_moby27782 + mu sync.Mutex // L1 + cv *sync.Cond // C1 + done chan struct{} +} + +func NewWatcher_moby27782() *Watcher_moby27782 { + w := &Watcher_moby27782{ + Events: make(chan Event_moby27782), + done: make(chan struct{}), + } + w.cv = sync.NewCond(&w.mu) + // deadlocks: x > 0 + go w.readEvents() // G3 + return w +} + +func (w *Watcher_moby27782) readEvents() { + defer close(w.Events) + for { + if w.isClosed() { + return + } + event := newEvent(Write_moby27782) // MODIFY event + if !event.ignoreLinux(w) { + runtime.Gosched() + select { + case w.Events <- event: + case <-w.done: + return + } + } + } +} + +func (w *Watcher_moby27782) isClosed() bool { + select { + case <-w.done: + return true + default: + return false + } +} + +func (w *Watcher_moby27782) Close() { + if w.isClosed() { + return + } + close(w.done) +} + +func (w *Watcher_moby27782) Remove() { + w.mu.Lock() + defer w.mu.Unlock() + exists := true + for exists { + w.cv.Wait() + runtime.Gosched() + } +} + +type FileWatcher_moby27782 interface { + Events() <-chan Event_moby27782 + Remove() + Close() +} + +func New_moby27782() FileWatcher_moby27782 { + return NewEventWatcher_moby27782() +} + +func NewEventWatcher_moby27782() FileWatcher_moby27782 { + return &fsNotifyWatcher_moby27782{NewWatcher_moby27782()} +} + +type fsNotifyWatcher_moby27782 struct { + *Watcher_moby27782 +} + +func (w *fsNotifyWatcher_moby27782) Events() <-chan Event_moby27782 { + return w.Watcher_moby27782.Events +} + +func watchFile_moby27782() FileWatcher_moby27782 { + fileWatcher := New_moby27782() + return fileWatcher +} + +type LogWatcher_moby27782 struct { + closeOnce sync.Once + closeNotifier chan struct{} +} + +func (w *LogWatcher_moby27782) Close() { + w.closeOnce.Do(func() { + close(w.closeNotifier) + }) +} + +func (w *LogWatcher_moby27782) WatchClose() <-chan struct{} { + return w.closeNotifier +} + +func NewLogWatcher_moby27782() *LogWatcher_moby27782 { + return &LogWatcher_moby27782{ + closeNotifier: make(chan struct{}), + } +} + +func followLogs_moby27782(logWatcher *LogWatcher_moby27782) { + fileWatcher := watchFile_moby27782() + defer func() { + fileWatcher.Close() + }() + waitRead := func() { + runtime.Gosched() + select { + case <-fileWatcher.Events(): + case <-logWatcher.WatchClose(): + fileWatcher.Remove() + return + } + } + handleDecodeErr := func() { + waitRead() + } + handleDecodeErr() +} + +type Container_moby27782 struct { + LogDriver *JSONFileLogger_moby27782 +} + +func (container *Container_moby27782) InitializeStdio() { + if err := container.startLogging(); err != nil { + container.Reset() + } +} + +func (container *Container_moby27782) startLogging() error { + l := &JSONFileLogger_moby27782{ + readers: make(map[*LogWatcher_moby27782]struct{}), + } + container.LogDriver = l + l.ReadLogs() + return errors.New("Some error") +} + +func (container *Container_moby27782) Reset() { + if container.LogDriver != nil { + container.LogDriver.Close() + } +} + +type JSONFileLogger_moby27782 struct { + readers map[*LogWatcher_moby27782]struct{} +} + +func (l *JSONFileLogger_moby27782) ReadLogs() *LogWatcher_moby27782 { + logWatcher := NewLogWatcher_moby27782() + // deadlocks: x > 0 + go l.readLogs(logWatcher) // G2 + return logWatcher +} + +func (l *JSONFileLogger_moby27782) readLogs(logWatcher *LogWatcher_moby27782) { + l.readers[logWatcher] = struct{}{} + followLogs_moby27782(logWatcher) +} + +func (l *JSONFileLogger_moby27782) Close() { + for r := range l.readers { + r.Close() + delete(l.readers, r) + } +} + +func Moby27782() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 10000; i++ { + go (&Container_moby27782{}).InitializeStdio() // G1 + } +} + +// +// Example deadlock trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------- +// InitializeStdio() +// startLogging() +// l.ReadLogs() +// NewLogWatcher() +// go l.readLogs() [G2] l.readLogs() +// container.Reset() . +// LogDriver.Close() . +// r.Close() . +// close(w.closeNotifier) . +// . followLogs(logWatcher) +// . watchFile() +// . New() +// . NewEventWatcher() +// . NewWatcher() +// . . w.readEvents() +// . . event.ignoreLinux() +// . . return false +// . <-logWatcher.WatchClose() . +// . fileWatcher.Remove() . +// . w.cv.Wait() . +// . . w.Events <- event +// --------------------------------G2,G3 leak------------------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/moby28462.go b/src/runtime/testdata/testgoroutineleakgc/moby28462.go new file mode 100644 index 00000000000000..af7bc8a4722db3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby28462.go @@ -0,0 +1,139 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/28462 + * Buggy version: b184bdabf7a01c4b802304ac64ac133743c484be + * fix commit-id: 89b123473774248fc3a0356dd3ce5b116cc69b29 + * Flaky: 69/100 + * Description: + * There are three goroutines mentioned in the bug report Moby#28405. + * Actually, only two goroutines are needed to trigger this bug. This bug + * is another example where lock and channel are mixed with each other. + * + * Moby#28405 : https://github.com/moby/moby/issues/28405 + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby28462", Moby28462) +} + +type State_moby28462 struct { + Health *Health_moby28462 +} + +type Container_moby28462 struct { + sync.Mutex + State *State_moby28462 +} + +func (ctr *Container_moby28462) start() { + go ctr.waitExit() +} +func (ctr *Container_moby28462) waitExit() { + +} + +type Store_moby28462 struct { + ctr *Container_moby28462 +} + +func (s *Store_moby28462) Get() *Container_moby28462 { + return s.ctr +} + +type Daemon_moby28462 struct { + containers Store_moby28462 +} + +func (d *Daemon_moby28462) StateChanged() { + c := d.containers.Get() + c.Lock() + d.updateHealthMonitorElseBranch(c) + defer c.Unlock() +} + +func (d *Daemon_moby28462) updateHealthMonitorIfBranch(c *Container_moby28462) { + h := c.State.Health + if stop := h.OpenMonitorChannel(); stop != nil { + go monitor_moby28462(c, stop) + } +} +func (d *Daemon_moby28462) updateHealthMonitorElseBranch(c *Container_moby28462) { + h := c.State.Health + h.CloseMonitorChannel() +} + +type Health_moby28462 struct { + stop chan struct{} +} + +func (s *Health_moby28462) OpenMonitorChannel() chan struct{} { + return s.stop +} + +func (s *Health_moby28462) CloseMonitorChannel() { + if s.stop != nil { + s.stop <- struct{}{} + } +} + +func monitor_moby28462(c *Container_moby28462, stop chan struct{}) { + for { + select { + case <-stop: + return + default: + handleProbeResult_moby28462(c) + } + } +} + +func handleProbeResult_moby28462(c *Container_moby28462) { + runtime.Gosched() + c.Lock() + defer c.Unlock() +} + +func NewDaemonAndContainer_moby28462() (*Daemon_moby28462, *Container_moby28462) { + c := &Container_moby28462{ + State: &State_moby28462{&Health_moby28462{make(chan struct{})}}, + } + d := &Daemon_moby28462{Store_moby28462{c}} + return d, c +} + +/// +/// G1 G2 +/// monitor() +/// handleProbeResult() +/// d.StateChanged() +/// c.Lock() +/// d.updateHealthMonitorElseBranch() +/// h.CloseMonitorChannel() +/// s.stop <- struct{}{} +/// c.Lock() +/// ----------------------G1,G2 deadlock------------------------ +/// + +func Moby28462() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 10000; i++ { + go func() { + d, c := NewDaemonAndContainer_moby28462() + // deadlocks: x > 0 + go monitor_moby28462(c, c.State.Health.OpenMonitorChannel()) // G1 + // deadlocks: x > 0 + go d.StateChanged() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby29733.go b/src/runtime/testdata/testgoroutineleakgc/moby29733.go new file mode 100644 index 00000000000000..65807d6ee7f1ac --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby29733.go @@ -0,0 +1,74 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby29733", Moby29733) +} + +type Plugin_moby29733 struct { + activated bool + activateWait *sync.Cond +} + +type plugins_moby29733 struct { + sync.Mutex + plugins map[int]*Plugin_moby29733 +} + +func (p *Plugin_moby29733) waitActive() { + p.activateWait.L.Lock() + for !p.activated { + p.activateWait.Wait() + } + p.activateWait.L.Unlock() +} + +type extpointHandlers_moby29733 struct { + sync.RWMutex + extpointHandlers map[int]struct{} +} + +func Handle_moby29733(storage plugins_moby29733, handlers extpointHandlers_moby29733) { + handlers.Lock() + for _, p := range storage.plugins { + p.activated = false + } + handlers.Unlock() +} + +func testActive_moby29733(p *Plugin_moby29733) { + done := make(chan struct{}) + go func() { + // deadlocks: x > 0 + p.waitActive() + close(done) + }() + <-done +} + +func Moby29733() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1; i++ { + go func() { + // deadlocks: x > 0 + storage := plugins_moby29733{plugins: make(map[int]*Plugin_moby29733)} + handlers := extpointHandlers_moby29733{extpointHandlers: make(map[int]struct{})} + + p := &Plugin_moby29733{activateWait: sync.NewCond(&sync.Mutex{})} + storage.plugins[0] = p + + testActive_moby29733(p) + Handle_moby29733(storage, handlers) + testActive_moby29733(p) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby30408.go b/src/runtime/testdata/testgoroutineleakgc/moby30408.go new file mode 100644 index 00000000000000..2ca03184260954 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby30408.go @@ -0,0 +1,62 @@ +package main + +import ( + "errors" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby30408", Moby30408) +} + +type Manifest_moby30408 struct { + Implements []string +} + +type Plugin_moby30408 struct { + activateWait *sync.Cond + activateErr error + Manifest *Manifest_moby30408 +} + +func (p *Plugin_moby30408) waitActive() error { + p.activateWait.L.Lock() + for !p.activated() { + p.activateWait.Wait() + } + p.activateWait.L.Unlock() + return p.activateErr +} + +func (p *Plugin_moby30408) activated() bool { + return p.Manifest != nil +} + +func testActive_moby30408(p *Plugin_moby30408) { + done := make(chan struct{}) + go func() { + // deadlocks: 100 + p.waitActive() + close(done) + }() + <-done +} + +func Moby30408() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: 100 + p := &Plugin_moby30408{activateWait: sync.NewCond(&sync.Mutex{})} + p.activateErr = errors.New("some junk happened") + + testActive_moby30408(p) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby33781.go b/src/runtime/testdata/testgoroutineleakgc/moby33781.go new file mode 100644 index 00000000000000..115313fa3c9acb --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby33781.go @@ -0,0 +1,84 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/33781 + * Buggy version: 33fd3817b0f5ca4b87f0a75c2bd583b4425d392b + * fix commit-id: 67297ba0051d39be544009ba76abea14bc0be8a4 + * Flaky: 25/100 + * Description: + * The goroutine created using anonymous function is blocked at + * sending message to a unbuffered channel. However there exists a + * path in the parent goroutine where the parent function will + * return without draining the channel. + */ + +package main + +import ( + "context" + "runtime" + "time" +) + +func init() { + register("Moby33781", Moby33781) +} + +func monitor_moby33781(stop chan bool) { + probeInterval := time.Millisecond + probeTimeout := time.Millisecond + for { + select { + case <-stop: + return + case <-time.After(probeInterval): + results := make(chan bool) + ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout) + go func() { // G3 + // deadlocks: x > 0 + results <- true + close(results) + }() + select { + case <-stop: + // results should be drained here + cancelProbe() + return + case <-results: + cancelProbe() + case <-ctx.Done(): + cancelProbe() + <-results + } + } + } +} + +/// +/// G1 G2 G3 +/// monitor() +/// <-time.After() +/// stop <- +/// <-stop +/// return +/// cancelProbe() +/// return +/// result<- +///----------------G3 leak------------------ +/// + +func Moby33781() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + for i := 0; i < 100; i++ { + go func(i int) { + stop := make(chan bool) + go monitor_moby33781(stop) // G1 + go func() { // G2 + time.Sleep(time.Duration(i) * time.Millisecond) + stop <- true + }() + }(i) + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby36114.go b/src/runtime/testdata/testgoroutineleakgc/moby36114.go new file mode 100644 index 00000000000000..1f4c3ba8533dbd --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby36114.go @@ -0,0 +1,52 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/36114 + * Buggy version: 6d4d3c52ae7c3f910bfc7552a2a673a8338e5b9f + * fix commit-id: a44fcd3d27c06aaa60d8d1cbce169f0d982e74b1 + * Flaky: 100/100 + * Description: + * This is a double lock bug. The the lock for the + * struct svm has already been locked when calling + * svm.hotRemoveVHDsAtStart() + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby36114", Moby36114) +} + +type serviceVM_moby36114 struct { + sync.Mutex +} + +func (svm *serviceVM_moby36114) hotAddVHDsAtStart() { + svm.Lock() + defer svm.Unlock() + svm.hotRemoveVHDsAtStart() +} + +func (svm *serviceVM_moby36114) hotRemoveVHDsAtStart() { + svm.Lock() + defer svm.Unlock() +} + +func Moby36114() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + s := &serviceVM_moby36114{} + // deadlocks: x > 0 + go s.hotAddVHDsAtStart() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4395.go b/src/runtime/testdata/testgoroutineleakgc/moby4395.go new file mode 100644 index 00000000000000..f77cc883025608 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby4395.go @@ -0,0 +1,49 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/4395 + * Buggy version: 6d6ec5e0051ad081be3d71e20b39a25c711b4bc3 + * fix commit-id: d3a6ee1e55a53ee54b91ffb6c53ba674768cf9de + * Flaky: 100/100 + * Description: + * The anonyous goroutine could be waiting on sending to + * the channel which might never be drained. + */ + +package main + +import ( + "errors" + "runtime" + "time" +) + +func init() { + register("Moby4395", Moby4395) +} + +func Go_moby4395(f func() error) chan error { + ch := make(chan error) + go func() { + // deadlocks: 1 + ch <- f() // G2 + }() + return ch +} + +/// +/// G1 G2 +/// Go() +/// return ch +/// ch <- f() +/// ----------G2 leak------------- +/// + +func Moby4395() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + Go_moby4395(func() error { // G1 + return errors.New("") + }) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4951.go b/src/runtime/testdata/testgoroutineleakgc/moby4951.go new file mode 100644 index 00000000000000..91d18fd73e8ecd --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby4951.go @@ -0,0 +1,102 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/4951 + * Buggy version: 81f148be566ab2b17810ad4be61a5d8beac8330f + * fix commit-id: 2ffef1b7eb618162673c6ffabccb9ca57c7dfce3 + * Flaky: 100/100 + * Description: + * The root cause and patch is clearly explained in the commit + * description. The global lock is devices.Lock(), and the device + * lock is baseInfo.lock.Lock(). It is very likely that this bug + * can be reproduced. + */ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Moby4951", Moby4951) +} + +type DeviceSet_moby4951 struct { + sync.Mutex + infos map[string]*DevInfo_moby4951 + nrDeletedDevices int +} + +func (devices *DeviceSet_moby4951) DeleteDevice(hash string) { + devices.Lock() + defer devices.Unlock() + + info := devices.lookupDevice(hash) + + info.lock.Lock() + defer info.lock.Unlock() + + devices.deleteDevice(info) +} + +func (devices *DeviceSet_moby4951) lookupDevice(hash string) *DevInfo_moby4951 { + existing, ok := devices.infos[hash] + if !ok { + return nil + } + return existing +} + +func (devices *DeviceSet_moby4951) deleteDevice(info *DevInfo_moby4951) { + devices.removeDeviceAndWait(info.Name()) +} + +func (devices *DeviceSet_moby4951) removeDeviceAndWait(devname string) { + /// remove devices by devname + devices.Unlock() + time.Sleep(300 * time.Nanosecond) + devices.Lock() +} + +type DevInfo_moby4951 struct { + lock sync.Mutex + name string +} + +func (info *DevInfo_moby4951) Name() string { + return info.name +} + +func NewDeviceSet_moby4951() *DeviceSet_moby4951 { + devices := &DeviceSet_moby4951{ + infos: make(map[string]*DevInfo_moby4951), + } + info1 := &DevInfo_moby4951{ + name: "info1", + } + info2 := &DevInfo_moby4951{ + name: "info2", + } + devices.infos[info1.name] = info1 + devices.infos[info2.name] = info2 + return devices +} + +func Moby4951() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + ds := NewDeviceSet_moby4951() + /// Delete devices by the same info + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby7559.go b/src/runtime/testdata/testgoroutineleakgc/moby7559.go new file mode 100644 index 00000000000000..d9c94b2c1f4d06 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby7559.go @@ -0,0 +1,52 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/7559 + * Buggy version: 64579f51fcb439c36377c0068ccc9a007b368b5a + * fix commit-id: 6cbb8e070d6c3a66bf48fbe5cbf689557eee23db + * Flaky: 100/100 + */ +package main + +import ( + "net" + "runtime" + "sync" + "time" +) + +func init() { + register("Moby7559", Moby7559) +} + +type UDPProxy_moby7559 struct { + connTrackLock sync.Mutex +} + +func (proxy *UDPProxy_moby7559) Run() { + for i := 0; i < 2; i++ { + proxy.connTrackLock.Lock() + _, err := net.DialUDP("udp", nil, nil) + if err != nil { + /// Missing unlock here + continue + } + if i == 0 { + break + } + } + proxy.connTrackLock.Unlock() +} +func Moby7559() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + proxy := &UDPProxy_moby7559{} + // deadlocks: x > 0 + go proxy.Run() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/patterns.go b/src/runtime/testdata/testgoroutineleakgc/patterns.go new file mode 100644 index 00000000000000..0849216683349f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/patterns.go @@ -0,0 +1,262 @@ +package main + +import ( + "context" + "fmt" + "runtime" + "time" +) + +func init() { + register("NoCloseRange", NoCloseRange) + register("MethodContractViolation", MethodContractViolation) + register("DoubleSend", DoubleSend) + register("EarlyReturn", EarlyReturn) + register("NCastLeak", NCastLeak) + register("Timeout", Timeout) +} + +// Incoming list of items and the number of workers. +func noCloseRange(list []any, workers int) { + ch := make(chan any) + + // Create each worker + for i := 0; i < workers; i++ { + go func() { + // deadlocks: 10 + + // Each worker waits for an item and processes it. + for item := range ch { + // Process each item + _ = item + } + }() + } + + // Send each item to one of the workers. + for _, item := range list { + // Sending can deadlock if workers == 0 or if one of the workers panics + ch <- item + } + // The channel is never closed, so workers deadlock once there are no more + // items left to process. +} + +func NoCloseRange() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + go noCloseRange([]any{1, 2, 3}, 0) + go noCloseRange([]any{1, 2, 3}, 3) +} + +// A worker processes items pushed to `ch` one by one in the background. +// When the worker is no longer needed, it must be closed with `Stop`. +// +// Specifications: +// +// A worker may be started any number of times, but must be stopped only once. +// Stopping a worker multiple times will lead to a close panic. +// Any worker that is started must eventually be stopped. +// Failing to stop a worker results in a goroutine leak +type worker struct { + ch chan any + done chan any +} + +// Start spawns a background goroutine that extracts items pushed to the queue. +func (w worker) Start() { + go func() { + // deadlocks: 1 + + for { + select { + case <-w.ch: // Normal workflow + case <-w.done: + return // Shut down + } + } + }() +} + +func (w worker) Stop() { + // Allows goroutine created by Start to terminate + close(w.done) +} + +func (w worker) AddToQueue(item any) { + w.ch <- item +} + +// worker limited in scope by workerLifecycle +func workerLifecycle(items []any) { + // Create a new worker + w := worker{ + ch: make(chan any), + done: make(chan any), + } + // Start worker + w.Start() + + // Operate on worker + for _, item := range items { + w.AddToQueue(item) + } + + runtime.Gosched() + // Exits without calling ’Stop’. Goroutine created by `Start` eventually deadlocks. +} + +func MethodContractViolation() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + workerLifecycle(make([]any, 10)) + runtime.Gosched() +} + +// doubleSend incoming channel must send a message (incoming error simulates an error generated internally). +func doubleSend(ch chan any, err error) { + if err != nil { + // In case of an error, send nil. + ch <- nil + // Return is missing here. + } + // Otherwise, continue with normal behaviour + // This send is still executed in the error case, which may lead to deadlock. + ch <- struct{}{} +} + +func DoubleSend() { + ch := make(chan any) + defer func() { + time.Sleep(1000 * time.Millisecond) + runtime.GC() + }() + + go func() { + // deadlocks: 0 + doubleSend(ch, nil) + }() + <-ch + + go func() { + // deadlocks: 1 + doubleSend(ch, fmt.Errorf("error")) + }() + <-ch + + ch1 := make(chan any, 1) + go func() { + // deadlocks: 0 + doubleSend(ch1, fmt.Errorf("error")) + }() + <-ch1 +} + +// earlyReturn demonstrates a common pattern of goroutine leaks. +// A return statement interrupts the evaluation of the parent goroutine before it can consume a message. +// Incoming error simulates an error produced internally. +func earlyReturn(err error) { + // Create a synchronous channel + ch := make(chan any) + + go func() { + // deadlocks: 1 + + // Send something to the channel. + // Deadlocks if the parent goroutine terminates early. + ch <- struct{}{} + }() + + if err != nil { + // Interrupt evaluation of parent early in case of error. + // Sender deadlocks. + return + } + + // Only receive if there is no error. + <-ch +} + +func EarlyReturn() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + go earlyReturn(nil) + go earlyReturn(fmt.Errorf("error")) +} + +// nCastLeak processes a number of items. First result to pass the post is retrieved from the channel queue. +func nCastLeak(items []any) { + // Channel is synchronous. + ch := make(chan any) + + // Iterate over every item + for range items { + go func() { + // deadlocks: 99 + + // Process item and send result to channel + ch <- struct{}{} + // Channel is synchronous: only one sender will synchronise + }() + } + // Retrieve first result. All other senders block. + // Receiver blocks if there are no senders. + <-ch +} + +func NCastLeak() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + go func() { + // deadlocks: 1 + nCastLeak(nil) + }() + + go func() { + nCastLeak(make([]any, 100)) + }() +} + +// A context is provided to short-circuit evaluation. +func timeout(ctx context.Context) { + ch := make(chan any) + + go func() { + // deadlocks: x > 0 + ch <- struct{}{} + }() + + runtime.Gosched() + select { + case <-ch: // Receive message + // Sender is released + case <-ctx.Done(): // Context was cancelled or timed out + // Sender is stuck + } +} + +func Timeout() { + defer func() { + time.Sleep(10 * time.Millisecond) + runtime.GC() + }() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + for i := 0; i < 100; i++ { + go timeout(ctx) + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/serving2137.go b/src/runtime/testdata/testgoroutineleakgc/serving2137.go new file mode 100644 index 00000000000000..b655585926b268 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/serving2137.go @@ -0,0 +1,143 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Serving2137", Serving2137) +} + +type token_serving2137 struct{} + +type request_serving2137 struct { + lock *sync.Mutex + accepted chan bool +} + +type Breaker_serving2137 struct { + pendingRequests chan token_serving2137 + activeRequests chan token_serving2137 +} + +func (b *Breaker_serving2137) Maybe(thunk func()) bool { + var t token_serving2137 + select { + default: + // Pending request queue is full. Report failure. + return false + case b.pendingRequests <- t: + // Pending request has capacity. + // Wait for capacity in the active queue. + b.activeRequests <- t + // Defer releasing capacity in the active and pending request queue. + defer func() { + <-b.activeRequests + runtime.Gosched() + <-b.pendingRequests + }() + // Do the thing. + thunk() + // Report success + return true + } +} + +func (b *Breaker_serving2137) concurrentRequest() request_serving2137 { + r := request_serving2137{lock: &sync.Mutex{}, accepted: make(chan bool, 1)} + r.lock.Lock() + var start sync.WaitGroup + start.Add(1) + go func() { // G2, G3 + // deadlocks: x > 0 + start.Done() + runtime.Gosched() + ok := b.Maybe(func() { + // Will block on locked mutex. + r.lock.Lock() + runtime.Gosched() + r.lock.Unlock() + }) + r.accepted <- ok + }() + start.Wait() // Ensure that the go func has had a chance to execute. + return r +} + +// Perform n requests against the breaker, returning mutexes for each +// request which succeeded, and a slice of bools for all requests. +func (b *Breaker_serving2137) concurrentRequests(n int) []request_serving2137 { + requests := make([]request_serving2137, n) + for i := range requests { + requests[i] = b.concurrentRequest() + } + return requests +} + +func NewBreaker_serving2137(queueDepth, maxConcurrency int32) *Breaker_serving2137 { + return &Breaker_serving2137{ + pendingRequests: make(chan token_serving2137, queueDepth+maxConcurrency), + activeRequests: make(chan token_serving2137, maxConcurrency), + } +} + +func unlock_serving2137(req request_serving2137) { + req.lock.Unlock() + runtime.Gosched() + // Verify that function has completed + ok := <-req.accepted + runtime.Gosched() + // Requeue for next usage + req.accepted <- ok +} + +func unlockAll_serving2137(requests []request_serving2137) { + for _, lc := range requests { + unlock_serving2137(lc) + } +} + +func Serving2137() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + b := NewBreaker_serving2137(1, 1) + + locks := b.concurrentRequests(2) // G1 + unlockAll_serving2137(locks) + }() + } +} + +// +// Example deadlock trace: +// G1 G2 G3 +// ------------------------------------------------------------------------------- +// b.concurrentRequests(2) +// b.concurrentRequest() +// r.lock.Lock() +// start.Done() +// start.Wait() +// b.concurrentRequest() +// r.lock.Lock() +// start.Done() +// start.Wait() +// unlockAll(locks) +// unlock(lc) +// req.lock.Unlock() +// ok := <-req.accepted +// b.Maybe() +// b.activeRequests <- t +// thunk() +// r.lock.Lock() +// b.Maybe() +// b.activeRequests <- t +// ----------------------------G1,G2,G3 deadlock----------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go new file mode 100644 index 00000000000000..16eb2800da3b6f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go @@ -0,0 +1,85 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Syncthing4829", Syncthing4829) +} + +type Address_syncthing4829 int + +type Mapping_syncthing4829 struct { + mut sync.RWMutex + + extAddresses map[string]Address_syncthing4829 +} + +func (m *Mapping_syncthing4829) clearAddresses() { + m.mut.Lock() // First locking + var removed []Address_syncthing4829 + for id, addr := range m.extAddresses { + removed = append(removed, addr) + delete(m.extAddresses, id) + } + if len(removed) > 0 { + m.notify(nil, removed) + } + m.mut.Unlock() +} + +func (m *Mapping_syncthing4829) notify(added, remove []Address_syncthing4829) { + m.mut.RLock() + m.mut.RUnlock() +} + +type Service_syncthing4829 struct { + mut sync.RWMutex + + mappings []*Mapping_syncthing4829 +} + +func (s *Service_syncthing4829) NewMapping() *Mapping_syncthing4829 { + mapping := &Mapping_syncthing4829{ + extAddresses: make(map[string]Address_syncthing4829), + } + s.mut.Lock() + s.mappings = append(s.mappings, mapping) + s.mut.Unlock() + return mapping +} + +func (s *Service_syncthing4829) RemoveMapping(mapping *Mapping_syncthing4829) { + s.mut.Lock() + defer s.mut.Unlock() + for _, existing := range s.mappings { + if existing == mapping { + mapping.clearAddresses() + } + } +} + +func NewService_syncthing4829() *Service_syncthing4829 { + return &Service_syncthing4829{} +} + +func Syncthing4829() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + natSvc := NewService_syncthing4829() + m := natSvc.NewMapping() + m.extAddresses["test"] = 0 + + natSvc.RemoveMapping(m) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go new file mode 100644 index 00000000000000..5ff25ca268a09b --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go @@ -0,0 +1,123 @@ +package main + +import ( + "runtime" + "sync" + "time" +) + +func init() { + register("Syncthing5795", Syncthing5795) +} + +type message_syncthing5795 interface{} + +type ClusterConfig_syncthing5795 struct{} + +type Model_syncthing5795 interface { + ClusterConfig(message_syncthing5795) +} + +type TestModel_syncthing5795 struct { + ccFn func() +} + +func (t *TestModel_syncthing5795) ClusterConfig(msg message_syncthing5795) { + if t.ccFn != nil { + t.ccFn() + } +} + +func newTestModel_syncthing5795() *TestModel_syncthing5795 { + return &TestModel_syncthing5795{} +} + +type Connection_syncthing5795 interface { + Start() + Close() +} + +type rawConnection_syncthing5795 struct { + receiver Model_syncthing5795 + + inbox chan message_syncthing5795 + dispatcherLoopStopped chan struct{} + closed chan struct{} + closeOnce sync.Once +} + +func (c *rawConnection_syncthing5795) Start() { + go c.readerLoop() + go func() { + // deadlocks: 1 + c.dispatcherLoop() + }() +} + +func (c *rawConnection_syncthing5795) readerLoop() { + for { + select { + case <-c.closed: + return + default: + } + } +} + +func (c *rawConnection_syncthing5795) dispatcherLoop() { + defer close(c.dispatcherLoopStopped) + var msg message_syncthing5795 + for { + select { + case msg = <-c.inbox: + case <-c.closed: + return + } + switch msg := msg.(type) { + case *ClusterConfig_syncthing5795: + c.receiver.ClusterConfig(msg) + default: + return + } + } +} + +func (c *rawConnection_syncthing5795) internalClose() { + c.closeOnce.Do(func() { + close(c.closed) + <-c.dispatcherLoopStopped + }) +} + +func (c *rawConnection_syncthing5795) Close() { + c.internalClose() +} + +func NewConnection_syncthing5795(receiver Model_syncthing5795) Connection_syncthing5795 { + return &rawConnection_syncthing5795{ + dispatcherLoopStopped: make(chan struct{}), + closed: make(chan struct{}), + inbox: make(chan message_syncthing5795), + receiver: receiver, + } +} + +func Syncthing5795() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + go func() { + // deadlocks: 1 + m := newTestModel_syncthing5795() + c := NewConnection_syncthing5795(m).(*rawConnection_syncthing5795) + m.ccFn = func() { + c.Close() + } + + c.Start() + c.inbox <- &ClusterConfig_syncthing5795{} + + <-c.dispatcherLoopStopped + }() +} diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00c0f08e5593c8..e8fef35da7d104 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1206,6 +1206,7 @@ var gStatusStrings = [...]string{ _Gwaiting: "waiting", _Gdead: "dead", _Gcopystack: "copystack", + _Gleaked: "leaked", _Gpreempted: "preempted", } diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index 03ec81fc0262a1..8b5eafd170f488 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -122,7 +122,7 @@ func goStatusToTraceGoStatus(status uint32, wr waitReason) tracev2.GoStatus { tgs = tracev2.GoRunning case _Gsyscall: tgs = tracev2.GoSyscall - case _Gwaiting, _Gpreempted: + case _Gwaiting, _Gpreempted, _Gleaked: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the