From d3c27cc17a4d638ff4171b8b1a2140d206dc90f8 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 15 Aug 2025 17:00:21 +0100 Subject: [PATCH 1/3] link: do not attempt to clean up perf event and tracefs event in order Switching to runtime.AddCleanup means that cleanups may be batched and may run concurrently. Since both the perf event and the tracefs event will have a cleanup attached it's not possible to guarantee an ordering. Drop the code which tries to clean up in order when the object is leaked without calling Close(). Signed-off-by: Lorenz Bauer --- link/perf_event.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/link/perf_event.go b/link/perf_event.go index a072a6c83..22c78ed92 100644 --- a/link/perf_event.go +++ b/link/perf_event.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "os" - "runtime" "unsafe" "github.com/cilium/ebpf" @@ -61,15 +60,11 @@ type perfEvent struct { func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent { pe := &perfEvent{event, fd} - // Both event and fd have their own finalizer, but we want to - // guarantee that they are closed in a certain order. - runtime.SetFinalizer(pe, (*perfEvent).Close) return pe } func (pe *perfEvent) Close() error { - runtime.SetFinalizer(pe, nil) - + // We close the perf event before attempting to remove the tracefs event. if err := pe.fd.Close(); err != nil { return fmt.Errorf("closing perf event fd: %w", err) } From 456b77f6f52ff42cecdd3b67333622db55a63910 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 15 Aug 2025 17:05:11 +0100 Subject: [PATCH 2/3] perf: drop finalizer on Reader It's not possible to call Reader.Close from a Cleanup, which indicates that it's probably not the right thing to do here. Drop the finalizer. Signed-off-by: Lorenz Bauer --- perf/reader.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/perf/reader.go b/perf/reader.go index ed5278d7a..62c5ca90f 100644 --- a/perf/reader.go +++ b/perf/reader.go @@ -8,7 +8,6 @@ import ( "fmt" "io" "os" - "runtime" "sync" "time" @@ -261,7 +260,6 @@ func NewReaderWithOptions(array *ebpf.Map, perCPUBuffer int, opts ReaderOptions) if err = pr.Resume(); err != nil { return nil, err } - runtime.SetFinalizer(pr, (*Reader).Close) return pr, nil } From 9bb5cfe56f46ebcd3d0db392b6f6c2ef56ecb368 Mon Sep 17 00:00:00 2001 From: Timo Beckers Date: Fri, 15 Aug 2025 17:20:05 +0100 Subject: [PATCH 3/3] all: use runtime.AddCleanup Convert all uses of runtime.SetFinalizer to runtime.AddCleanup. The exception is the unsafe memory code, which cannot currently be converted due to fundamental differences with finalizers. Cleanups cannot carry references to the object in question, and cannot resurrect the object, meaning the original object's allocation can be used to service new allocations before the cleanup has run. In our case, this means bpf map memory gets zeroed by the allocator, which is obviously a show stopper. If we want to move off finalizers, we need to implement https://github.com/golang/go/issues/70224 first and switch to that instead. Signed-off-by: Timo Beckers Suggested-by: Lorenz Bauer --- btf/kernel.go | 6 +++--- internal/epoll/poller.go | 8 ++++++-- internal/sys/fd.go | 21 ++++++--------------- internal/sys/fd_other.go | 4 +++- internal/sys/fd_windows.go | 4 +++- internal/testutils/chan.go | 2 ++ internal/tracefs/kprobe.go | 11 ++++++++--- memory.go | 23 ++++++++++++++--------- memory_test.go | 4 ---- memory_unsafe.go | 1 + perf/ring.go | 7 +++++-- ringbuf/ring_other.go | 8 ++++++-- ringbuf/ring_windows.go | 8 ++++++-- variable_test.go | 25 +++++++++++++++---------- 14 files changed, 78 insertions(+), 54 deletions(-) diff --git a/btf/kernel.go b/btf/kernel.go index 1ea6ca218..3c41821d8 100644 --- a/btf/kernel.go +++ b/btf/kernel.go @@ -151,9 +151,9 @@ func loadKernelSpec() (*Spec, error) { return nil, fmt.Errorf("load vmlinux: %w", err) } - runtime.SetFinalizer(spec.decoder.sharedBuf, func(_ *sharedBuf) { - _ = unix.Munmap(raw) - }) + runtime.AddCleanup(spec.decoder.sharedBuf, func(b []byte) { + _ = unix.Munmap(b) + }, raw) return spec, nil } diff --git a/internal/epoll/poller.go b/internal/epoll/poller.go index fb9b1ba78..8781a6c93 100644 --- a/internal/epoll/poller.go +++ b/internal/epoll/poller.go @@ -34,6 +34,7 @@ type Poller struct { eventMu sync.Mutex closeEvent *eventFd flushEvent *eventFd + cleanup runtime.Cleanup } func New() (_ *Poller, err error) { @@ -75,7 +76,10 @@ func New() (_ *Poller, err error) { return nil, fmt.Errorf("add flush eventfd: %w", err) } - runtime.SetFinalizer(p, (*Poller).Close) + p.cleanup = runtime.AddCleanup(p, func(raw int) { + _ = unix.Close(raw) + }, p.epollFd) + return p, nil } @@ -84,7 +88,7 @@ func New() (_ *Poller, err error) { // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent // calls will return os.ErrClosed. func (p *Poller) Close() error { - runtime.SetFinalizer(p, nil) + p.cleanup.Stop() // Interrupt Wait() via the closeEvent fd if it's currently blocked. if err := p.wakeWaitForClose(); err != nil { diff --git a/internal/sys/fd.go b/internal/sys/fd.go index 7e769fb54..f12d11c20 100644 --- a/internal/sys/fd.go +++ b/internal/sys/fd.go @@ -21,23 +21,14 @@ const invalidFd = -1 func newFD(value int) *FD { testmain.TraceFD(value, 1) - fd := &FD{value} - runtime.SetFinalizer(fd, (*FD).finalize) + fd := &FD{raw: value} + fd.cleanup = runtime.AddCleanup(fd, func(raw int) { + testmain.LeakFD(raw) + _ = unix.Close(raw) + }, fd.raw) return fd } -// finalize is set as the FD's runtime finalizer and -// sends a leak trace before calling FD.Close(). -func (fd *FD) finalize() { - if fd.raw == invalidFd { - return - } - - testmain.LeakFD(fd.raw) - - _ = fd.Close() -} - func (fd *FD) String() string { return strconv.FormatInt(int64(fd.raw), 10) } @@ -63,6 +54,6 @@ func (fd *FD) Disown() int { testmain.ForgetFD(value) fd.raw = invalidFd - runtime.SetFinalizer(fd, nil) + fd.cleanup.Stop() return value } diff --git a/internal/sys/fd_other.go b/internal/sys/fd_other.go index 47057395e..2a6423a59 100644 --- a/internal/sys/fd_other.go +++ b/internal/sys/fd_other.go @@ -5,12 +5,14 @@ package sys import ( "fmt" "os" + "runtime" "github.com/cilium/ebpf/internal/unix" ) type FD struct { - raw int + raw int + cleanup runtime.Cleanup } // NewFD wraps a raw fd with a finalizer. diff --git a/internal/sys/fd_windows.go b/internal/sys/fd_windows.go index f39276382..1291c763f 100644 --- a/internal/sys/fd_windows.go +++ b/internal/sys/fd_windows.go @@ -3,6 +3,7 @@ package sys import ( "fmt" "os" + "runtime" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/efw" @@ -12,7 +13,8 @@ import ( // // It is not equivalent to a real file descriptor or handle. type FD struct { - raw int + raw int + cleanup runtime.Cleanup } // NewFD wraps a raw fd with a finalizer. diff --git a/internal/testutils/chan.go b/internal/testutils/chan.go index ac95f2dcb..9761a72cd 100644 --- a/internal/testutils/chan.go +++ b/internal/testutils/chan.go @@ -8,6 +8,8 @@ import ( // WaitChan waits for a value to be sent on a channel, or for a timeout to // occur. If the timeout is reached, the test will fail. func WaitChan[T any](tb testing.TB, ch <-chan T, timeout time.Duration) { + tb.Helper() + select { case <-ch: return diff --git a/internal/tracefs/kprobe.go b/internal/tracefs/kprobe.go index 4a9353a5a..68a1804d5 100644 --- a/internal/tracefs/kprobe.go +++ b/internal/tracefs/kprobe.go @@ -200,6 +200,8 @@ type Event struct { group, name string // event id allocated by the kernel. 0 if the event has already been removed. id uint64 + + cleanup runtime.Cleanup } // NewEvent creates a new ephemeral trace event. @@ -316,8 +318,11 @@ func NewEvent(args ProbeArgs) (*Event, error) { return nil, fmt.Errorf("get trace event id: %w", err) } - evt := &Event{args.Type, args.Group, eventName, tid} - runtime.SetFinalizer(evt, (*Event).Close) + evt := &Event{typ: args.Type, group: args.Group, name: eventName, id: tid} + evt.cleanup = runtime.AddCleanup(evt, func(*byte) { + _ = removeEvent(args.Type, fmt.Sprintf("%s/%s", args.Group, eventName)) + }, nil) + return evt, nil } @@ -330,7 +335,7 @@ func (evt *Event) Close() error { } evt.id = 0 - runtime.SetFinalizer(evt, nil) + evt.cleanup.Stop() pe := fmt.Sprintf("%s/%s", evt.group, evt.name) return removeEvent(evt.typ, pe) } diff --git a/memory.go b/memory.go index 3475fb07b..bb7325a46 100644 --- a/memory.go +++ b/memory.go @@ -38,6 +38,8 @@ type Memory struct { b []byte ro bool heap bool + + cleanup runtime.Cleanup } func newMemory(fd, size int) (*Memory, error) { @@ -62,20 +64,23 @@ func newMemory(fd, size int) (*Memory, error) { return nil, fmt.Errorf("setting up memory-mapped region: %w", err) } - mm := &Memory{ - b, - ro, - false, - } - runtime.SetFinalizer(mm, (*Memory).close) + mm := &Memory{b: b, ro: ro, heap: false} + mm.cleanup = runtime.AddCleanup(&mm, memoryCleanupFunc(), b) return mm, nil } -func (mm *Memory) close() { - if err := unix.Munmap(mm.b); err != nil { - panic(fmt.Errorf("unmapping memory: %w", err)) +func memoryCleanupFunc() func([]byte) { + return func(b []byte) { + if err := unix.Munmap(b); err != nil { + panic(fmt.Errorf("unmapping memory: %w", err)) + } } +} + +func (mm *Memory) close() { + mm.cleanup.Stop() + memoryCleanupFunc()(mm.b) mm.b = nil } diff --git a/memory_test.go b/memory_test.go index 195ee6702..83e180e41 100644 --- a/memory_test.go +++ b/memory_test.go @@ -4,7 +4,6 @@ import ( "io" "math" "os" - "runtime" "testing" "github.com/go-quicktest/qt" @@ -88,9 +87,6 @@ func TestMemoryClose(t *testing.T) { mm, err := mustMmapableArray(t, 0).Memory() qt.Assert(t, qt.IsNil(err)) - // Avoid unmap running twice. - runtime.SetFinalizer(mm, nil) - // unmap panics if the operation fails. mm.close() } diff --git a/memory_unsafe.go b/memory_unsafe.go index cc254397f..9518ff35d 100644 --- a/memory_unsafe.go +++ b/memory_unsafe.go @@ -87,6 +87,7 @@ func newUnsafeMemory(fd, size int) (*Memory, error) { unsafe.Slice((*byte)(alloc), size), ro, true, + runtime.Cleanup{}, } return mm, nil diff --git a/perf/ring.go b/perf/ring.go index a00fb70d6..5b836df62 100644 --- a/perf/ring.go +++ b/perf/ring.go @@ -22,6 +22,7 @@ type perfEventRing struct { cpu int mmap []byte ringReader + cleanup runtime.Cleanup } func newPerfEventRing(cpu, perCPUBuffer int, opts ReaderOptions) (_ *sys.FD, _ *perfEventRing, err error) { @@ -73,7 +74,9 @@ func newPerfEventRing(cpu, perCPUBuffer int, opts ReaderOptions) (_ *sys.FD, _ * mmap: mmap, ringReader: reader, } - runtime.SetFinalizer(ring, (*perfEventRing).Close) + ring.cleanup = runtime.AddCleanup(ring, func(mmap []byte) { + _ = unix.Munmap(mmap) + }, ring.mmap) return fd, ring, nil } @@ -95,7 +98,7 @@ func perfBufferSize(perCPUBuffer int) int { } func (ring *perfEventRing) Close() error { - runtime.SetFinalizer(ring, nil) + ring.cleanup.Stop() mmap := ring.mmap ring.mmap = nil return unix.Munmap(mmap) diff --git a/ringbuf/ring_other.go b/ringbuf/ring_other.go index 69f6b75ba..c793499d9 100644 --- a/ringbuf/ring_other.go +++ b/ringbuf/ring_other.go @@ -16,6 +16,7 @@ type ringbufEventRing struct { prod []byte cons []byte *ringReader + cleanup runtime.Cleanup } func newRingBufEventRing(mapFD, size int) (*ringbufEventRing, error) { @@ -38,13 +39,16 @@ func newRingBufEventRing(mapFD, size int) (*ringbufEventRing, error) { cons: cons, ringReader: newRingReader(cons_pos, prod_pos, prod[os.Getpagesize():]), } - runtime.SetFinalizer(ring, (*ringbufEventRing).Close) + ring.cleanup = runtime.AddCleanup(ring, func(*byte) { + _ = unix.Munmap(prod) + _ = unix.Munmap(cons) + }, nil) return ring, nil } func (ring *ringbufEventRing) Close() error { - runtime.SetFinalizer(ring, nil) + ring.cleanup.Stop() prod, cons := ring.prod, ring.cons ring.prod, ring.cons = nil, nil diff --git a/ringbuf/ring_windows.go b/ringbuf/ring_windows.go index 6c695aec2..96c114d6a 100644 --- a/ringbuf/ring_windows.go +++ b/ringbuf/ring_windows.go @@ -14,6 +14,8 @@ type ringbufEventRing struct { mapFd *sys.FD cons, prod, data *uint8 *ringReader + + cleanup runtime.Cleanup } func newRingBufEventRing(mapFD, size int) (*ringbufEventRing, error) { @@ -51,13 +53,15 @@ func newRingBufEventRing(mapFD, size int) (*ringbufEventRing, error) { data: dataPtr, ringReader: newRingReader(consPos, prodPos, data), } - runtime.SetFinalizer(ring, (*ringbufEventRing).Close) + ring.cleanup = runtime.AddCleanup(ring, func(*byte) { + efw.EbpfRingBufferMapUnmapBuffer(fd.Int(), consPtr, prodPtr, dataPtr) + }, nil) return ring, nil } func (ring *ringbufEventRing) Close() error { - runtime.SetFinalizer(ring, nil) + ring.cleanup.Stop() return errors.Join( efw.EbpfRingBufferMapUnmapBuffer(ring.mapFd.Int(), ring.cons, ring.prod, ring.data), diff --git a/variable_test.go b/variable_test.go index e6f7d3a0c..e740ef8d1 100644 --- a/variable_test.go +++ b/variable_test.go @@ -6,6 +6,7 @@ import ( "sync/atomic" "testing" "time" + "unsafe" "github.com/go-quicktest/qt" @@ -291,14 +292,11 @@ func TestVariablePointerGC(t *testing.T) { var obj obj_s qt.Assert(t, qt.IsNil(loadAndAssign(t, spec, &obj, nil))) - // Set finalizer on obj to get notified when it is collected. + // Set cleanup on obj to get notified when it is collected. ogc := make(chan struct{}) - runtime.SetFinalizer(&obj, func(*obj_s) { + runtime.AddCleanup(&obj, func(*byte) { close(ogc) - }) - - // Set finalizer on the last byte of the Memory to get notified when it is - // collected. + }, nil) mem, err := obj.AtomicMap.unsafeMemory() qt.Assert(t, qt.IsNil(err)) obj.AtomicMap.Close() @@ -308,14 +306,17 @@ func TestVariablePointerGC(t *testing.T) { go func() { select { case <-mgc: - panic("memory finalizer ran unexpectedly") + panic("memory cleanup ran unexpectedly") case <-cancel: return } }() - runtime.SetFinalizer(&mem.b[len(mem.b)-1], func(p *byte) { + + // Set cleanup on the Memory's backing array to get notified when it is + // collected. + runtime.AddCleanup(unsafe.SliceData(mem.b), func(*byte) { close(mgc) - }) + }, nil) // Pull out Program handle and Variable pointer so reference to obj is // dropped. @@ -342,7 +343,11 @@ func TestVariablePointerGC(t *testing.T) { close(cancel) runtime.KeepAlive(a32) - // Another GC cycle to trigger collecting the backing array. + // More GC cycles to collect the backing array. As long as the unsafe memory + // implementation is still on SetFinalizer, this needs multiple cycles to + // work, since finalizers can resurrect objects. 3 GCs seems to work reliably. + runtime.GC() + runtime.GC() runtime.GC() // Wait for backing array to be finalized.