feat: add graceful power off support to QEMU VM launcher

The QEMU VM launcher's /poweroff HTTP endpoint now accepts an optional grace-period query parameter (Go duration format, e.g. "5m"). When set, it sends an ACPI power button event via the QEMU monitor socket instead of immediately killing the process, allowing the guest OS to shut down cleanly. If the guest does not shut down within the grace period, the process is force-killed as a fallback.

Without the parameter, the behavior is unchanged (immediate kill).

Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
This commit is contained in:
Utku Ozdemir 2026-03-26 15:37:17 +01:00
parent 3400059ccf
commit bed2bd414e
No known key found for this signature in database
GPG Key ID: DBD13117B0A14E93
4 changed files with 84 additions and 8 deletions

View File

@ -6,6 +6,7 @@ package qemu
import (
"sync"
"time"
"github.com/siderolabs/talos/pkg/provision/providers/vm"
)
@ -34,6 +35,7 @@ type Controller struct {
state PowerState
forcePXEBoot bool
gracePeriod time.Duration
commandsCh chan VMCommand
}
@ -66,6 +68,11 @@ func (c *Controller) PowerOn() error {
// PowerOff implements vm.Controller interface.
func (c *Controller) PowerOff() error {
return c.PowerOffWithGracePeriod(0)
}
// PowerOffWithGracePeriod implements vm.Controller interface.
func (c *Controller) PowerOffWithGracePeriod(gracePeriod time.Duration) error {
c.mu.Lock()
if c.state == PoweredOff {
@ -75,6 +82,7 @@ func (c *Controller) PowerOff() error {
}
c.state = PoweredOff
c.gracePeriod = gracePeriod
c.mu.Unlock()
c.commandsCh <- VMCommandStop
@ -82,10 +90,20 @@ func (c *Controller) PowerOff() error {
return nil
}
// GracePeriod returns the grace period for the current power off operation.
func (c *Controller) GracePeriod() time.Duration {
c.mu.Lock()
defer c.mu.Unlock()
return c.gracePeriod
}
// Reboot implements vm.Controller interface.
func (c *Controller) Reboot() error {
c.mu.Lock()
c.gracePeriod = 0
if c.state == PoweredOff {
c.state = PoweredOn

View File

@ -408,7 +408,26 @@ func launchVM(config *LaunchConfig) error {
return nil
case command := <-config.controller.CommandsCh():
if command == VMCommandStop {
fmt.Fprintf(os.Stderr, "exiting VM as stop command via API was received\n")
gracePeriod := config.controller.GracePeriod()
if gracePeriod > 0 {
fmt.Fprintf(os.Stderr, "gracefully shutting down VM via QEMU monitor (timeout %s)\n", gracePeriod)
if err := sendMonitorCommand(config.MonitorPath, "system_powerdown"); err != nil {
fmt.Fprintf(os.Stderr, "failed to send system_powerdown: %s, falling back to kill\n", err)
} else {
select {
case err := <-done:
if err != nil {
return fmt.Errorf("process exited with error %s", err)
}
return nil
case <-time.After(gracePeriod):
fmt.Fprintf(os.Stderr, "graceful shutdown timed out, killing VM\n")
}
}
}
if err := cmd.Process.Kill(); err != nil {
return fmt.Errorf("failed to kill process %w", err)
@ -422,6 +441,24 @@ func launchVM(config *LaunchConfig) error {
}
}
func sendMonitorCommand(monitorPath, command string) error {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
conn, err := (&net.Dialer{}).DialContext(ctx, "unix", monitorPath)
if err != nil {
return fmt.Errorf("failed to connect to QEMU monitor: %w", err)
}
defer conn.Close() //nolint:errcheck
if _, err = fmt.Fprintf(conn, "%s\n", command); err != nil {
return fmt.Errorf("failed to send command: %w", err)
}
return nil
}
// Launch a control process around qemu VM manager.
//
// This function is invoked from 'talosctl qemu-launch' hidden command

View File

@ -4,10 +4,13 @@
package vm
import "time"
// Controller interface should be implemented by the VM to be controlled via the API.
type Controller interface {
PowerOn() error
PowerOff() error
PowerOffWithGracePeriod(gracePeriod time.Duration) error
Reboot() error
PXEBootOnce() error
Status() Status

View File

@ -12,9 +12,11 @@ import (
"io"
"net/http"
"net/netip"
"net/url"
"os"
"os/signal"
"syscall"
"time"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/provision/internal/inmemhttp"
@ -44,7 +46,7 @@ func ConfigureSignals() chan os.Signal {
return c
}
func httpPostWrapper(f func() error) http.Handler {
func httpPostWrapper(f func(url.Values) error) http.Handler {
return http.HandlerFunc(
func(w http.ResponseWriter, req *http.Request) {
if req.Body != nil {
@ -58,7 +60,7 @@ func httpPostWrapper(f func() error) http.Handler {
return
}
err := f()
err := f(req.URL.Query())
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
@ -110,23 +112,39 @@ func NewHTTPServer(ctx context.Context, gatewayAddr netip.Addr, port int, config
if controller != nil {
for _, method := range []struct {
pattern string
f func() error
f func(url.Values) error
}{
{
pattern: "/poweron",
f: controller.PowerOn,
f: func(_ url.Values) error { return controller.PowerOn() },
},
{
pattern: "/poweroff",
f: controller.PowerOff,
f: func(q url.Values) error {
raw := q.Get("grace-period")
if raw == "" {
return controller.PowerOff()
}
d, err := time.ParseDuration(raw)
if err != nil {
return fmt.Errorf("invalid grace-period: %w", err)
}
if d < 0 {
return fmt.Errorf("invalid grace-period: must be non-negative")
}
return controller.PowerOffWithGracePeriod(d)
},
},
{
pattern: "/reboot",
f: controller.Reboot,
f: func(_ url.Values) error { return controller.Reboot() },
},
{
pattern: "/pxeboot",
f: controller.PXEBootOnce,
f: func(_ url.Values) error { return controller.PXEBootOnce() },
},
} {
httpServer.AddHandler(method.pattern, httpPostWrapper(method.f))