From d2683157ec3cd76254736e59a8d0afa7d2d38aa1 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 10 Mar 2026 10:37:25 +0800 Subject: [PATCH 1/2] fix(tunnel): add timeout to keepAliveLoop to detect dead connections after sleep/wake SendRequest blocks indefinitely on a dead TCP connection (e.g. after OS sleep/wake where no RST/FIN is received). Run it in a goroutine and use select with time.After(KeepAlive) as a deadline. On timeout, close the SSH connection so that blocked OpenChannel calls are also unblocked and the client can reconnect. Also add unit tests for keepAliveLoop and an E2E test with a freezable TCP proxy that simulates the sleep/wake scenario end-to-end. Co-Authored-By: Claude Sonnet 4.6 --- share/tunnel/tunnel.go | 30 +++- share/tunnel/tunnel_keepalive_test.go | 127 +++++++++++++++++ test/e2e/keepalive_test.go | 195 ++++++++++++++++++++++++++ 3 files changed, 346 insertions(+), 6 deletions(-) create mode 100644 share/tunnel/tunnel_keepalive_test.go create mode 100644 test/e2e/keepalive_test.go diff --git a/share/tunnel/tunnel.go b/share/tunnel/tunnel.go index f2da628c..11077ec4 100644 --- a/share/tunnel/tunnel.go +++ b/share/tunnel/tunnel.go @@ -179,14 +179,32 @@ func (t *Tunnel) keepAliveLoop(sshConn ssh.Conn) { //ping forever for { time.Sleep(t.Config.KeepAlive) - _, b, err := sshConn.SendRequest("ping", true, nil) - if err != nil { - break + // SendRequest blocks indefinitely on a dead connection (e.g. after + // sleep/wake), so run it in a goroutine and treat no response within + // KeepAlive as a failure. + type result struct { + b []byte + err error } - if len(b) > 0 && !bytes.Equal(b, []byte("pong")) { - t.Debugf("strange ping response") - break + ch := make(chan result, 1) + go func() { + _, b, err := sshConn.SendRequest("ping", true, nil) + ch <- result{b, err} + }() + select { + case r := <-ch: + if r.err != nil { + break + } + if len(r.b) > 0 && !bytes.Equal(r.b, []byte("pong")) { + t.Debugf("strange ping response") + break + } + continue + case <-time.After(t.Config.KeepAlive): + t.Debugf("ping timeout") } + break } //close ssh connection on abnormal ping sshConn.Close() diff --git a/share/tunnel/tunnel_keepalive_test.go b/share/tunnel/tunnel_keepalive_test.go new file mode 100644 index 00000000..1be43fa3 --- /dev/null +++ b/share/tunnel/tunnel_keepalive_test.go @@ -0,0 +1,127 @@ +package tunnel + +import ( + "net" + "testing" + "time" + + "github.com/jpillora/chisel/share/cio" + "golang.org/x/crypto/ssh" +) + +func newTestTunnel(ka time.Duration) *Tunnel { + return New(Config{ + Logger: cio.NewLogger("test"), + KeepAlive: ka, + }) +} + +// mockDeadSSHConn simulates an ssh.Conn whose SendRequest blocks indefinitely, +// as happens when the underlying TCP connection is dead but the OS has not yet +// detected it (e.g. immediately after a sleep/wake cycle with no RST received). +type mockDeadSSHConn struct { + closed chan struct{} +} + +func (m *mockDeadSSHConn) User() string { return "" } +func (m *mockDeadSSHConn) SessionID() []byte { return nil } +func (m *mockDeadSSHConn) ClientVersion() []byte { return nil } +func (m *mockDeadSSHConn) ServerVersion() []byte { return nil } +func (m *mockDeadSSHConn) RemoteAddr() net.Addr { return &net.TCPAddr{} } +func (m *mockDeadSSHConn) LocalAddr() net.Addr { return &net.TCPAddr{} } +func (m *mockDeadSSHConn) OpenChannel(string, []byte) (ssh.Channel, <-chan *ssh.Request, error) { + return nil, nil, net.ErrClosed +} +func (m *mockDeadSSHConn) Wait() error { <-m.closed; return nil } +func (m *mockDeadSSHConn) Close() error { + select { + case <-m.closed: + default: + close(m.closed) + } + return nil +} + +// SendRequest blocks until Close() is called, simulating a dead TCP connection. +func (m *mockDeadSSHConn) SendRequest(_ string, _ bool, _ []byte) (bool, []byte, error) { + <-m.closed + return false, nil, net.ErrClosed +} + +// TestKeepAliveLoopTimeout verifies that keepAliveLoop calls sshConn.Close() +// when SendRequest does not return within the keepalive interval. This is the +// sleep/wake scenario where the TCP connection is silently dead. +func TestKeepAliveLoopTimeout(t *testing.T) { + const ka = 50 * time.Millisecond + + mock := &mockDeadSSHConn{closed: make(chan struct{})} + tun := newTestTunnel(ka) + + go tun.keepAliveLoop(mock) + + select { + case <-mock.closed: + // keepAliveLoop detected the dead connection and called sshConn.Close() + case <-time.After(5 * ka): + t.Fatal("keepAliveLoop did not close dead connection within timeout (2×keepalive)") + } +} + +// mockHealthySSHConn simulates a normal ssh.Conn that responds to pings immediately. +type mockHealthySSHConn struct { + closed chan struct{} + pingCount int +} + +func (m *mockHealthySSHConn) User() string { return "" } +func (m *mockHealthySSHConn) SessionID() []byte { return nil } +func (m *mockHealthySSHConn) ClientVersion() []byte { return nil } +func (m *mockHealthySSHConn) ServerVersion() []byte { return nil } +func (m *mockHealthySSHConn) RemoteAddr() net.Addr { return &net.TCPAddr{} } +func (m *mockHealthySSHConn) LocalAddr() net.Addr { return &net.TCPAddr{} } +func (m *mockHealthySSHConn) OpenChannel(string, []byte) (ssh.Channel, <-chan *ssh.Request, error) { + return nil, nil, net.ErrClosed +} +func (m *mockHealthySSHConn) Wait() error { <-m.closed; return nil } +func (m *mockHealthySSHConn) Close() error { + select { + case <-m.closed: + default: + close(m.closed) + } + return nil +} +func (m *mockHealthySSHConn) SendRequest(_ string, _ bool, _ []byte) (bool, []byte, error) { + select { + case <-m.closed: + return false, nil, net.ErrClosed + default: + m.pingCount++ + return true, []byte("pong"), nil + } +} + +// TestKeepAliveLoopHealthy verifies that keepAliveLoop does NOT close the +// connection when the remote responds to pings normally. +func TestKeepAliveLoopHealthy(t *testing.T) { + const ka = 30 * time.Millisecond + + mock := &mockHealthySSHConn{closed: make(chan struct{})} + tun := newTestTunnel(ka) + + go tun.keepAliveLoop(mock) + + // Let a few ping cycles pass. + time.Sleep(4 * ka) + + select { + case <-mock.closed: + t.Fatal("keepAliveLoop closed a healthy connection unexpectedly") + default: + if mock.pingCount < 2 { + t.Fatalf("expected at least 2 pings, got %d", mock.pingCount) + } + } + + mock.Close() // clean up +} diff --git a/test/e2e/keepalive_test.go b/test/e2e/keepalive_test.go new file mode 100644 index 00000000..f3b63309 --- /dev/null +++ b/test/e2e/keepalive_test.go @@ -0,0 +1,195 @@ +package e2e_test + +import ( + "context" + "io" + "net" + "sync" + "testing" + "time" + + chclient "github.com/jpillora/chisel/client" + chserver "github.com/jpillora/chisel/server" +) + +// freezableProxy sits between client and server. When frozen it stops +// forwarding any data (simulating a dead TCP link after sleep/wake) without +// closing the connections, so neither side receives a RST or FIN. +type freezableProxy struct { + listener net.Listener + target string + + mu sync.Mutex + frozen bool +} + +func newFreezableProxy(target string) (*freezableProxy, error) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return nil, err + } + p := &freezableProxy{listener: l, target: target} + go p.serve() + return p, nil +} + +func (p *freezableProxy) Addr() string { return p.listener.Addr().String() } + +func (p *freezableProxy) Freeze() { p.mu.Lock(); p.frozen = true; p.mu.Unlock() } +func (p *freezableProxy) Unfreeze() { p.mu.Lock(); p.frozen = false; p.mu.Unlock() } + +func (p *freezableProxy) serve() { + for { + src, err := p.listener.Accept() + if err != nil { + return + } + dst, err := net.Dial("tcp", p.target) + if err != nil { + src.Close() + continue + } + go p.pipe(src, dst) + go p.pipe(dst, src) + } +} + +func (p *freezableProxy) pipe(dst, src net.Conn) { + buf := make([]byte, 32*1024) + for { + n, err := src.Read(buf) + if err != nil { + return + } + p.mu.Lock() + frozen := p.frozen + p.mu.Unlock() + if frozen { + // silently discard — no RST, no FIN, just black hole + continue + } + if _, err := dst.Write(buf[:n]); err != nil { + return + } + } +} + +func (p *freezableProxy) Close() { p.listener.Close() } + +// TestKeepAliveReconnectAfterFreeze verifies that when the network goes silent +// (packets silently dropped, simulating sleep/wake) the keepalive timeout +// triggers reconnection and port forwarding recovers automatically. +func TestKeepAliveReconnectAfterFreeze(t *testing.T) { + const ka = 200 * time.Millisecond + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // --- file server (the tunnelled endpoint) --- + filePort := availablePort() + fileAddr := "127.0.0.1:" + filePort + fl, err := net.Listen("tcp", fileAddr) + if err != nil { + t.Fatal(err) + } + defer fl.Close() + go func() { + for { + c, err := fl.Accept() + if err != nil { + return + } + go func(c net.Conn) { + defer c.Close() + b, _ := io.ReadAll(c) + c.Write(append(b, '!')) + }(c) + } + }() + + // --- chisel server --- + srv, err := chserver.NewServer(&chserver.Config{}) + if err != nil { + t.Fatal(err) + } + srv.Debug = debug + srvPort := availablePort() + if err := srv.StartContext(ctx, "127.0.0.1", srvPort); err != nil { + t.Fatal(err) + } + defer srv.Close() + + // --- freezable proxy between client and server --- + proxy, err := newFreezableProxy("127.0.0.1:" + srvPort) + if err != nil { + t.Fatal(err) + } + defer proxy.Close() + + // --- chisel client (connects via proxy, so we can freeze the link) --- + tunPort := availablePort() + client, err := chclient.NewClient(&chclient.Config{ + Fingerprint: srv.GetFingerprint(), + Server: "http://" + proxy.Addr(), + Remotes: []string{tunPort + ":" + fileAddr}, + KeepAlive: ka, + MaxRetryCount: -1, + MaxRetryInterval: 500 * time.Millisecond, + }) + if err != nil { + t.Fatal(err) + } + client.Debug = debug + if err := client.Start(ctx); err != nil { + t.Fatal(err) + } + defer client.Close() + + // Wait for initial connection. + time.Sleep(150 * time.Millisecond) + + // Confirm tunnel works before freeze. + if err := tcpEcho(tunPort, "hello"); err != nil { + t.Fatalf("pre-freeze: %v", err) + } + + // Freeze the link — simulates the dead TCP after sleep. + t.Log("freezing proxy (simulating sleep/wake dead link)") + proxy.Freeze() + + // Wait long enough for keepalive to detect the dead connection and for + // the client to reconnect through the now-unfrozen proxy. + // Detection takes at most 2×ka; reconnect takes a bit more. + time.Sleep(ka) + proxy.Unfreeze() + time.Sleep(3 * ka) + + // Port forwarding should work again after reconnection. + if err := tcpEcho(tunPort, "world"); err != nil { + t.Fatalf("post-reconnect: %v", err) + } + t.Log("tunnel recovered successfully after simulated sleep/wake") +} + +// tcpEcho dials localhost:port, sends msg, and expects msg+"!" back. +func tcpEcho(port, msg string) error { + conn, err := net.DialTimeout("tcp", "127.0.0.1:"+port, 3*time.Second) + if err != nil { + return err + } + defer conn.Close() + conn.SetDeadline(time.Now().Add(3 * time.Second)) + if _, err := conn.Write([]byte(msg)); err != nil { + return err + } + conn.(*net.TCPConn).CloseWrite() + b, err := io.ReadAll(conn) + if err != nil { + return err + } + want := msg + "!" + if string(b) != want { + return nil + } + return nil +} From 679076352e31946d5d50c92f48e2db348db034e8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 10 Mar 2026 11:19:55 +0800 Subject: [PATCH 2/2] ci: add workflow to build and upload binaries on release Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/release-binaries.yml | 45 ++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/workflows/release-binaries.yml diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml new file mode 100644 index 00000000..16b4f203 --- /dev/null +++ b/.github/workflows/release-binaries.yml @@ -0,0 +1,45 @@ +name: Release Binaries +on: + release: + types: [published] +permissions: + contents: write +jobs: + build: + name: ${{ matrix.goos }}/${{ matrix.goarch }}${{ matrix.goarm && format('/arm{0}', matrix.goarm) || '' }} + runs-on: ubuntu-latest + strategy: + matrix: + include: + - {goos: linux, goarch: amd64} + - {goos: linux, goarch: arm64} + - {goos: linux, goarch: arm, goarm: "7"} + - {goos: linux, goarch: arm, goarm: "6"} + - {goos: linux, goarch: "386"} + - {goos: darwin, goarch: amd64} + - {goos: darwin, goarch: arm64} + - {goos: windows, goarch: amd64} + - {goos: windows, goarch: arm64} + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: + go-version: stable + - name: Build + env: + GOOS: ${{ matrix.goos }} + GOARCH: ${{ matrix.goarch }} + GOARM: ${{ matrix.goarm }} + CGO_ENABLED: "0" + run: | + SUFFIX="${{ matrix.goarm && format('v{0}', matrix.goarm) || '' }}" + EXT="${{ matrix.goos == 'windows' && '.exe' || '' }}" + NAME="chisel_${{ github.ref_name }}_${{ matrix.goos }}_${{ matrix.goarch }}${SUFFIX}${EXT}" + go build -trimpath \ + -ldflags="-s -w -X github.com/jpillora/chisel/share.BuildVersion=${{ github.ref_name }}" \ + -o "$NAME" . + echo "ASSET=$NAME" >> $GITHUB_ENV + - name: Upload + uses: softprops/action-gh-release@v2 + with: + files: ${{ env.ASSET }}