From 030d256ea00e53a4903cae0010995a0760df1be4 Mon Sep 17 00:00:00 2001 From: Adam Thomason Date: Wed, 23 Jul 2025 11:14:17 +0100 Subject: [PATCH 1/2] Fix race condition with daemonized jailer in startVMM When using jailer with Daemonize: true, the jailer parent process exits immediately after forking the child firecracker process. This caused a race condition where the SDK's process monitoring goroutine would detect the parent exit and send to errCh before waitForSocket() could establish the socket connection, causing startVMM to fail prematurely. This change modifies the process monitoring logic to treat a clean exit (status=0) of a daemonized jailer parent as expected behavior rather than an error condition. The goroutine now returns early in this case, allowing waitForSocket() and other initialization logic to complete normally. The actual firecracker child process continues running in daemon mode and creates the API socket as expected. Signed-off-by: Adam Thomason --- machine.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/machine.go b/machine.go index 341e935a..93b5dc88 100644 --- a/machine.go +++ b/machine.go @@ -588,6 +588,16 @@ func (m *Machine) startVMM(ctx context.Context) error { errCh := make(chan error) go func() { waitErr := m.cmd.Wait() + + // If using daemonized jailer and parent exits cleanly, + // this is expected behavior. Don't treat it as an error. + // We return immediately and allow subsequent functions + // (such as waitForSocket) to send any errors to channels. + if m.Cfg.JailerCfg != nil && m.Cfg.JailerCfg.Daemonize && waitErr == nil { + m.logger.Debugf("jailer parent exited (expected for daemonized mode)") + return + } + if waitErr != nil { m.logger.Warnf("firecracker exited: %s", waitErr.Error()) } else { @@ -606,7 +616,6 @@ func (m *Machine) startVMM(ctx context.Context) error { // second one never ends as it tries to read from empty channel. close(errCh) close(m.cleanupCh) - }() m.setupSignals() From 9e975a7a0bbed2da539ef273a202015d34cceae7 Mon Sep 17 00:00:00 2001 From: Adam Thomason Date: Thu, 24 Jul 2025 10:38:27 +0100 Subject: [PATCH 2/2] Implemented a new shutdownCh on the VM struct, which allows closure from the Shutdown receiver to trigger startVMM to continue when using daemonized jailer process. Signed-off-by: Adam Thomason --- machine.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/machine.go b/machine.go index 93b5dc88..e67b1f82 100644 --- a/machine.go +++ b/machine.go @@ -268,6 +268,8 @@ type Machine struct { startOnce sync.Once // exitCh is a channel which gets closed when the VMM exits exitCh chan struct{} + // shutdownCh is a channel which gets closed when the VM is shutdown + shutdownCh chan struct{} // fatalErr records an error that either stops or prevent starting the VMM fatalErr error @@ -360,8 +362,9 @@ func configureBuilder(builder VMCommandBuilder, cfg Config) VMCommandBuilder { // provided Config. func NewMachine(ctx context.Context, cfg Config, opts ...Opt) (*Machine, error) { m := &Machine{ - exitCh: make(chan struct{}), - cleanupCh: make(chan struct{}), + exitCh: make(chan struct{}), + shutdownCh: make(chan struct{}), + cleanupCh: make(chan struct{}), } if cfg.VMID == "" { @@ -460,6 +463,9 @@ func (m *Machine) Start(ctx context.Context) error { // Shutdown requests a clean shutdown of the VM by sending CtrlAltDelete on the virtual keyboard func (m *Machine) Shutdown(ctx context.Context) error { m.logger.Debug("Called machine.Shutdown()") + + close(m.shutdownCh) + if runtime.GOARCH != "arm64" { return m.sendCtrlAltDel(ctx) } else { @@ -591,11 +597,10 @@ func (m *Machine) startVMM(ctx context.Context) error { // If using daemonized jailer and parent exits cleanly, // this is expected behavior. Don't treat it as an error. - // We return immediately and allow subsequent functions - // (such as waitForSocket) to send any errors to channels. + // We instead wait for closure of shutdownCh from m.Shutdown(). if m.Cfg.JailerCfg != nil && m.Cfg.JailerCfg.Daemonize && waitErr == nil { m.logger.Debugf("jailer parent exited (expected for daemonized mode)") - return + <-m.shutdownCh } if waitErr != nil {