Skip to content

Commit c85e72c

Browse files
authored
Merge pull request #3979 from norio-nomura/ssh-over-vsock-on-vz
vz: add SSH over AF_VSOCK
2 parents 82f24d7 + 7473c1e commit c85e72c

File tree

7 files changed

+247
-16
lines changed

7 files changed

+247
-16
lines changed

hack/test-templates.sh

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,15 @@ declare -A CHECKS=(
6161
["set-user"]=""
6262
["preserve-env"]="1"
6363
["static-port-forwards"]=""
64+
["ssh-over-vsock"]=""
6465
)
6566

6667
case "$NAME" in
6768
"default")
6869
# CI failure:
6970
# "[hostagent] failed to confirm whether /c/Users/runneradmin [remote] is successfully mounted"
7071
[ "${OS_HOST}" = "Msys" ] && CHECKS["mount-home"]=
72+
[ "${OS_HOST}" = "Darwin" ] && CHECKS["ssh-over-vsock"]="1"
7173
;;
7274
"alpine"*)
7375
WARNING "Alpine does not support systemd"
@@ -341,14 +343,46 @@ if [[ -n ${CHECKS["preserve-env"]} ]]; then
341343
"${scriptdir}"/test-preserve-env.sh "$NAME"
342344
fi
343345

346+
if [[ -n ${CHECKS["ssh-over-vsock"]} ]]; then
347+
if [[ "$(limactl ls "${NAME}" --yq .vmType)" == "vz" ]]; then
348+
INFO "Testing SSH over vsock"
349+
set -x
350+
INFO "Testing LIMA_SSH_OVER_VSOCK=true environment"
351+
limactl stop "${NAME}"
352+
if ! LIMA_SSH_OVER_VSOCK=true limactl start "${NAME}" 2>&1 | grep -i "started vsock forwarder"; then
353+
set +x
354+
diagnose "${NAME}"
355+
ERROR "LIMA_SSH_OVER_VSOCK=true did not enable vsock forwarder"
356+
exit 1
357+
fi
358+
INFO 'Testing LIMA_SSH_OVER_VSOCK="" environment'
359+
limactl stop "${NAME}"
360+
if ! LIMA_SSH_OVER_VSOCK="" limactl start "${NAME}" 2>&1 | grep -i "started vsock forwarder"; then
361+
set +x
362+
diagnose "${NAME}"
363+
ERROR "LIMA_SSH_OVER_VSOCK= did not enable vsock forwarder"
364+
exit 1
365+
fi
366+
INFO "Testing LIMA_SSH_OVER_VSOCK=false environment"
367+
limactl stop "${NAME}"
368+
if ! LIMA_SSH_OVER_VSOCK=false limactl start "${NAME}" 2>&1 | grep -i "skipping detection of SSH server on vsock port"; then
369+
set +x
370+
diagnose "${NAME}"
371+
ERROR "LIMA_SSH_OVER_VSOCK=false did not disable vsock forwarder"
372+
exit 1
373+
fi
374+
set +x
375+
fi
376+
fi
377+
344378
# Use GHCR to avoid hitting Docker Hub rate limit
345379
nginx_image="ghcr.io/stargz-containers/nginx:1.19-alpine-org"
346380
alpine_image="ghcr.io/containerd/alpine:3.14.0"
347381

348382
if [[ -n ${CHECKS["container-engine"]} ]]; then
349383
sudo=""
350384
# Currently WSL2 machines only support privileged engine. This requirement might be lifted in the future.
351-
if [[ "$(limactl ls --json "${NAME}" | jq -r .vmType)" == "wsl2" ]]; then
385+
if [[ "$(limactl ls "${NAME}" --yq .vmType)" == "wsl2" ]]; then
352386
sudo="sudo"
353387
fi
354388
INFO "Run a nginx container with port forwarding 127.0.0.1:8080"
@@ -428,7 +462,7 @@ if [[ -n ${CHECKS["port-forwards"]} ]]; then
428462
sudo="sudo"
429463
fi
430464
# Currently WSL2 machines only support privileged engine. This requirement might be lifted in the future.
431-
if [[ "$(limactl ls --json "${NAME}" | jq -r .vmType)" == "wsl2" ]]; then
465+
if [[ "$(limactl ls "${NAME}" --yq .vmType)" == "wsl2" ]]; then
432466
sudo="sudo"
433467
fi
434468
limactl shell "$NAME" $sudo $CONTAINER_ENGINE info
@@ -613,7 +647,7 @@ if [[ -n ${CHECKS["clone"]} ]]; then
613647
limactl start "$NAME"
614648
fi
615649

616-
if [[ $NAME == "fedora" && "$(limactl ls --json "$NAME" | jq -r .vmType)" == "vz" ]]; then
650+
if [[ $NAME == "fedora" && "$(limactl ls "${NAME}" --yq .vmType)" == "vz" ]]; then
617651
"${scriptdir}"/test-selinux.sh "$NAME"
618652
fi
619653

pkg/driver/vz/vm_darwin.go

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,8 @@ func startVM(ctx context.Context, inst *limatype.Instance, sshLocalPort int) (*v
7272

7373
errCh := make(chan error)
7474

75-
filesToRemove := make(map[string]struct{})
76-
defer func() {
77-
for f := range filesToRemove {
78-
_ = os.RemoveAll(f)
79-
}
80-
}()
75+
waitSSHLocalPortAccessible := make(chan struct{})
76+
defer close(waitSSHLocalPortAccessible)
8177
go func() {
8278
// Handle errors via errCh and handle stop vm during context close
8379
defer func() {
@@ -105,13 +101,36 @@ func startVM(ctx context.Context, inst *limatype.Instance, sshLocalPort int) (*v
105101
logrus.Errorf("error writing to pid fil %q", pidFile)
106102
errCh <- err
107103
}
108-
filesToRemove[pidFile] = struct{}{}
109104
logrus.Info("[VZ] - vm state change: running")
110105

111-
err := usernetClient.ConfigureDriver(ctx, inst, sshLocalPort)
106+
usernetSSHLocalPort := sshLocalPort
107+
useSSHOverVsock := true
108+
if envVar := os.Getenv("LIMA_SSH_OVER_VSOCK"); envVar != "" {
109+
b, err := strconv.ParseBool(envVar)
110+
if err != nil {
111+
logrus.WithError(err).Warnf("invalid LIMA_SSH_OVER_VSOCK value %q", envVar)
112+
} else {
113+
useSSHOverVsock = b
114+
}
115+
}
116+
if !useSSHOverVsock {
117+
logrus.Info("LIMA_SSH_OVER_VSOCK is false, skipping detection of SSH server on vsock port")
118+
} else if err := usernetClient.WaitOpeningSSHPort(ctx, inst); err == nil {
119+
hostAddress := net.JoinHostPort(inst.SSHAddress, strconv.Itoa(usernetSSHLocalPort))
120+
if err := wrapper.startVsockForwarder(ctx, 22, hostAddress); err == nil {
121+
logrus.Infof("Detected SSH server is listening on the vsock port; changed %s to proxy for the vsock port", hostAddress)
122+
usernetSSHLocalPort = 0 // disable gvisor ssh port forwarding
123+
} else {
124+
logrus.WithError(err).Warn("Failed to detect SSH server on vsock port, falling back to usernet forwarder")
125+
}
126+
} else {
127+
logrus.WithError(err).Warn("Failed to wait for the guest SSH server to become available, falling back to usernet forwarder")
128+
}
129+
err := usernetClient.ConfigureDriver(ctx, inst, usernetSSHLocalPort)
112130
if err != nil {
113131
errCh <- err
114132
}
133+
waitSSHLocalPortAccessible <- struct{}{}
115134
case vz.VirtualMachineStateStopped:
116135
logrus.Info("[VZ] - vm state change: stopped")
117136
wrapper.mu.Lock()
@@ -128,7 +147,7 @@ func startVM(ctx context.Context, inst *limatype.Instance, sshLocalPort int) (*v
128147
}
129148
}
130149
}()
131-
150+
<-waitSSHLocalPortAccessible
132151
return wrapper, errCh, err
133152
}
134153

pkg/driver/vz/vsock_forwarder.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//go:build darwin && !no_vz
2+
3+
// SPDX-FileCopyrightText: Copyright The Lima Authors
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
package vz
7+
8+
import (
9+
"context"
10+
"errors"
11+
"net"
12+
13+
"github.com/containers/gvisor-tap-vsock/pkg/tcpproxy"
14+
"github.com/sirupsen/logrus"
15+
)
16+
17+
func (m *virtualMachineWrapper) startVsockForwarder(ctx context.Context, vsockPort uint32, hostAddress string) error {
18+
// Test if the vsock port is open
19+
conn, err := m.dialVsock(ctx, vsockPort)
20+
if err != nil {
21+
return err
22+
}
23+
conn.Close()
24+
// Start listening on localhost:hostPort and forward to vsock:vsockPort
25+
_, _, err = net.SplitHostPort(hostAddress)
26+
if err != nil {
27+
return err
28+
}
29+
var lc net.ListenConfig
30+
l, err := lc.Listen(ctx, "tcp", hostAddress)
31+
if err != nil {
32+
return err
33+
}
34+
go func() {
35+
<-ctx.Done()
36+
l.Close()
37+
}()
38+
logrus.Infof("Started vsock forwarder: %s -> vsock:%d on VM", hostAddress, vsockPort)
39+
go func() {
40+
defer l.Close()
41+
for {
42+
conn, err := l.Accept()
43+
if err != nil {
44+
if errors.Is(err, net.ErrClosed) {
45+
return
46+
}
47+
logrus.WithError(err).Errorf("vsock forwarder accept error: %v", err)
48+
} else {
49+
p := tcpproxy.DialProxy{
50+
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
51+
return m.dialVsock(ctx, vsockPort)
52+
},
53+
}
54+
go p.HandleConn(conn)
55+
}
56+
select {
57+
case <-ctx.Done():
58+
return
59+
default:
60+
continue
61+
}
62+
}
63+
}()
64+
return nil
65+
}
66+
67+
func (m *virtualMachineWrapper) dialVsock(_ context.Context, port uint32) (conn net.Conn, err error) {
68+
for _, socket := range m.SocketDevices() {
69+
conn, err = socket.Connect(port)
70+
if err == nil {
71+
return conn, nil
72+
}
73+
}
74+
return nil, err
75+
}

pkg/networks/usernet/client.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,11 @@ func (c *Client) ConfigureDriver(ctx context.Context, inst *limatype.Instance, s
3838
if err != nil {
3939
return err
4040
}
41-
err = c.ResolveAndForwardSSH(ipAddress, sshLocalPort)
42-
if err != nil {
43-
return err
41+
if sshLocalPort != 0 {
42+
err = c.ResolveAndForwardSSH(ipAddress, sshLocalPort)
43+
if err != nil {
44+
return err
45+
}
4446
}
4547
hosts := inst.Config.HostResolver.Hosts
4648
if hosts == nil {
@@ -127,6 +129,30 @@ func (c *Client) Leases(ctx context.Context) (map[string]string, error) {
127129
return leases, nil
128130
}
129131

132+
// WaitOpeningSSHPort Wait until the guest ssh server is available.
133+
func (c *Client) WaitOpeningSSHPort(ctx context.Context, inst *limatype.Instance) error {
134+
// This timeout is based on the maximum wait time for the first essential requirement.
135+
timeoutSeconds := 600
136+
ctx, cancel := context.WithTimeout(ctx, time.Duration(timeoutSeconds)*time.Second)
137+
defer cancel()
138+
macAddress := limayaml.MACAddress(inst.Dir)
139+
ipAddr, err := c.ResolveIPAddress(ctx, macAddress)
140+
if err != nil {
141+
return err
142+
}
143+
// -1 avoids both sides timing out simultaneously.
144+
u := fmt.Sprintf("%s/extension/wait_port?ip=%s&port=22&timeout=%d", c.base, ipAddr, timeoutSeconds-1)
145+
res, err := httpclientutil.Get(ctx, c.client, u)
146+
if err != nil {
147+
return err
148+
}
149+
defer res.Body.Close()
150+
if res.StatusCode != http.StatusOK {
151+
return errors.New("failed to wait for SSH port")
152+
}
153+
return nil
154+
}
155+
130156
func NewClientByName(nwName string) *Client {
131157
endpointSock, err := Sock(nwName, EndpointSock)
132158
if err != nil {

pkg/networks/usernet/gvproxy.go

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net/http"
1313
"os"
1414
"runtime"
15+
"strconv"
1516
"strings"
1617
"time"
1718

@@ -103,7 +104,8 @@ func run(ctx context.Context, g *errgroup.Group, configuration *types.Configurat
103104
if err != nil {
104105
return err
105106
}
106-
httpServe(ctx, g, ln, vn.Mux())
107+
108+
httpServe(ctx, g, ln, muxWithExtension(vn))
107109

108110
if opts.QemuSocket != "" {
109111
err = listenQEMU(ctx, vn)
@@ -239,6 +241,58 @@ func httpServe(ctx context.Context, g *errgroup.Group, ln net.Listener, mux http
239241
})
240242
}
241243

244+
func muxWithExtension(n *virtualnetwork.VirtualNetwork) *http.ServeMux {
245+
m := n.Mux()
246+
m.HandleFunc("/extension/wait_port", func(w http.ResponseWriter, r *http.Request) {
247+
ip := r.URL.Query().Get("ip")
248+
if net.ParseIP(ip) == nil {
249+
msg := fmt.Sprintf("invalid ip address: %s", ip)
250+
http.Error(w, msg, http.StatusBadRequest)
251+
return
252+
}
253+
port16, err := strconv.ParseUint(r.URL.Query().Get("port"), 10, 16)
254+
if err != nil {
255+
http.Error(w, err.Error(), http.StatusBadRequest)
256+
return
257+
}
258+
port := uint16(port16)
259+
addr := fmt.Sprintf("%s:%d", ip, port)
260+
261+
timeoutSeconds := 10
262+
if timeoutString := r.URL.Query().Get("timeout"); timeoutString != "" {
263+
timeout16, err := strconv.ParseUint(timeoutString, 10, 16)
264+
if err != nil {
265+
http.Error(w, err.Error(), http.StatusBadRequest)
266+
return
267+
}
268+
timeoutSeconds = int(timeout16)
269+
}
270+
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds)*time.Second)
271+
defer cancel()
272+
// Wait until the port is available.
273+
for {
274+
conn, err := n.DialContextTCP(ctx, addr)
275+
if err == nil {
276+
conn.Close()
277+
logrus.Debugf("Port is available on %s", addr)
278+
w.WriteHeader(http.StatusOK)
279+
break
280+
}
281+
select {
282+
case <-ctx.Done():
283+
msg := fmt.Sprintf("timed out waiting for port to become available on %s", addr)
284+
logrus.Warn(msg)
285+
http.Error(w, msg, http.StatusRequestTimeout)
286+
return
287+
default:
288+
}
289+
logrus.Debugf("Waiting for port to become available on %s", addr)
290+
time.Sleep(1 * time.Second)
291+
}
292+
})
293+
return m
294+
}
295+
242296
func searchDomains() []string {
243297
if runtime.GOOS != "windows" {
244298
return resolveSearchDomain("/etc/resolv.conf")

website/content/en/docs/config/environment-variables.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ This page documents the environment variables used in Lima.
106106
lima
107107
```
108108

109+
### `LIMA_SSH_OVER_VSOCK`
110+
- **Description**: Specifies to use vsock for SSH connection instead of port forwarding.
111+
- **Default**: `true` (since v2.0.0)
112+
- **Usage**:
113+
```sh
114+
export LIMA_SSH_OVER_VSOCK=true
115+
```
116+
- **Note**: This variable is effective only if the VM is VZ based and systemd is v256 or later (e.g. Ubuntu 24.10+).
117+
109118
### `LIMA_SSH_PORT_FORWARDER`
110119

111120
- **Description**: Specifies to use the SSH port forwarder (slow) instead of gRPC (fast, previously unstable)

website/content/en/docs/config/port.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,20 @@ LIMA_SSH_PORT_FORWARDER=true limactl start
3636
- Doesn't support UDP based port forwarding
3737
- Spawns child process on host for running SSH master.
3838

39+
#### SSH over AF_VSOCK
40+
41+
| ⚡ Requirement | Lima >= 2.0 |
42+
|---------------|-------------|
43+
44+
If VM is VZ based and systemd is v256 or later (e.g. Ubuntu 24.10+), Lima uses AF_VSOCK for communication between host and guest.
45+
SSH based port forwarding is much faster when using AF_VSOCK compared to traditional virtual network based port forwarding.
46+
47+
To disable this feature, set `LIMA_SSH_OVER_VSOCK` to `false`:
48+
49+
```bash
50+
export LIMA_SSH_OVER_VSOCK=false
51+
```
52+
3953
### Using GRPC
4054

4155
| ⚡ Requirement | Lima >= 1.0 |

0 commit comments

Comments
 (0)