fix: handle boot failure

Fixes #13083

Signed-off-by: Mateusz Urbanek <mateusz.urbanek@siderolabs.com>
(cherry picked from commit 181584a5f1850f2bfb2a837c0d05bd9e30ee48b5)
This commit is contained in:
Mateusz Urbanek 2026-04-07 22:09:13 +02:00 committed by Andrey Smirnov
parent 6dc97e8aa7
commit 7d7776dcaa
No known key found for this signature in database
GPG Key ID: 322C6F63F594CE7C
10 changed files with 235 additions and 31 deletions

View File

@ -99,8 +99,7 @@ func syncNonVolatileStorageBuffers() {
func handle(ctx context.Context, err error) {
rebootCmd := int(emergency.RebootCmd.Load())
var rebootErr runtime.RebootError
if errors.As(err, &rebootErr) {
if rebootErr, ok := errors.AsType[runtime.RebootError](err); ok {
// not a failure, but wrapped reboot command
rebootCmd = rebootErr.Cmd
@ -195,7 +194,7 @@ func run() error {
}
//nolint:gocyclo
func runEntrypoint(ctx context.Context, c *v1alpha1runtime.Controller) error {
func runEntrypoint(ctx context.Context, c *v1alpha1runtime.Controller) (returnErr error) {
errCh := make(chan error)
var controllerWaitGroup sync.WaitGroup
@ -217,6 +216,29 @@ func runEntrypoint(ctx context.Context, c *v1alpha1runtime.Controller) error {
go runDebugServer(ctx)
// Run emergency volume cleanup on fatal errors before canceling the context,
// so that the COSI controller runtime is still alive and can react to
// volume lifecycle teardown.
defer func() {
if returnErr == nil {
return
}
if _, ok := errors.AsType[runtime.RebootError](returnErr); ok { //nolint:errcheck
// successful reboot/shutdown sequences already performed volume cleanup
return
}
log.Printf("running emergency volume cleanup")
emergencyCtx, emergencyCancel := context.WithTimeout(context.Background(), constants.EmergencyCleanupTimeout)
defer emergencyCancel()
if e := c.Run(emergencyCtx, runtime.SequenceEmergencyVolumeCleanup, nil, runtime.WithTakeover()); e != nil {
log.Printf("WARNING: emergency volume cleanup failed: %s", e)
}
}()
// Schedule service shutdown on any return.
defer system.Services(c.Runtime()).Shutdown(ctx)

View File

@ -35,30 +35,35 @@ const (
SequenceReset
// SequenceReboot is the reboot sequence.
SequenceReboot
// SequenceEmergencyVolumeCleanup is the emergency volume cleanup sequence.
SequenceEmergencyVolumeCleanup
)
const (
boot = "boot"
initialize = "initialize"
install = "install"
shutdown = "shutdown"
upgrade = "upgrade"
stageUpgrade = "stageUpgrade"
maintenanceUpgrade = "maintenanceUpgrade"
reset = "reset"
reboot = "reboot"
noop = "noop"
boot = "boot"
initialize = "initialize"
install = "install"
shutdown = "shutdown"
upgrade = "upgrade"
stageUpgrade = "stageUpgrade"
maintenanceUpgrade = "maintenanceUpgrade"
reset = "reset"
reboot = "reboot"
emergencyVolumeCleanup = "emergencyVolumeCleanup"
noop = "noop"
)
var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
SequenceInitialize: {
SequenceMaintenanceUpgrade: {},
SequenceReboot: {},
SequenceMaintenanceUpgrade: {},
SequenceReboot: {},
SequenceEmergencyVolumeCleanup: {},
},
SequenceBoot: {
SequenceReboot: {},
SequenceReset: {},
SequenceUpgrade: {},
SequenceReboot: {},
SequenceReset: {},
SequenceUpgrade: {},
SequenceEmergencyVolumeCleanup: {},
},
SequenceReboot: {
SequenceReboot: {},
@ -70,7 +75,7 @@ var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
// String returns the string representation of a `Sequence`.
func (s Sequence) String() string {
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, maintenanceUpgrade, reset, reboot}[s]
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, maintenanceUpgrade, reset, reboot, emergencyVolumeCleanup}[s]
}
// CanTakeOver defines sequences priority.
@ -117,6 +122,8 @@ func ParseSequence(s string) (seq Sequence, err error) {
seq = SequenceReset
case reboot:
seq = SequenceReboot
case emergencyVolumeCleanup:
seq = SequenceEmergencyVolumeCleanup
case noop:
seq = SequenceNoop
default:
@ -154,6 +161,7 @@ type Sequencer interface {
StageUpgrade(Runtime, *machine.UpgradeRequest) []Phase
Upgrade(Runtime, *machine.UpgradeRequest) []Phase
MaintenanceUpgrade(Runtime, *machine.UpgradeRequest) []Phase
EmergencyVolumeCleanup(Runtime) []Phase
}
// EventSequenceStart represents the sequence start event.

View File

@ -417,6 +417,8 @@ func (c *Controller) phases(seq runtime.Sequence, data any) ([]runtime.Phase, er
}
phases = c.s.Reset(c.r, in)
case runtime.SequenceEmergencyVolumeCleanup:
phases = c.s.EmergencyVolumeCleanup(c.r)
case runtime.SequenceNoop:
default:
return nil, fmt.Errorf("sequence not implemented: %q", seq)

View File

@ -68,6 +68,10 @@ func (m *mockSequencer) Upgrade(r runtime.Runtime, req *machine.UpgradeRequest)
return m.phases[runtime.SequenceUpgrade]
}
func (m *mockSequencer) EmergencyVolumeCleanup(r runtime.Runtime) []runtime.Phase {
return m.phases[runtime.SequenceEmergencyVolumeCleanup]
}
func (m *mockSequencer) trackCall(name string, doneCh chan struct{}) func(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(seq runtime.Sequence, data any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {

View File

@ -514,6 +514,36 @@ func stopAllPhaselist(r runtime.Runtime, enableKexec bool) PhaseList {
return phases
}
// EmergencyVolumeCleanup is the emergency volume cleanup sequence.
//
// This sequence runs the essential volume cleanup phases with a short timeout
// to properly unmount and tear down volumes before an emergency reboot.
// It is invoked when a fatal error occurs during normal sequence execution.
func (*Sequencer) EmergencyVolumeCleanup(r runtime.Runtime) []runtime.Phase {
phases := PhaseList{}
switch r.State().Platform().Mode() { //nolint:exhaustive
case runtime.ModeContainer:
// no volume cleanup needed in container mode
default:
phases = phases.Append(
"umount",
UnmountPodMounts,
).Append(
"unmountBind",
UnmountSystemDiskBindMounts,
).Append(
"unmountSystem",
UnmountEphemeralPartition,
).Append(
"volumeFinalize",
TeardownVolumeLifecycle,
)
}
return phases
}
func bootPartitionInTargets(targets []runtime.PartitionTarget) bool {
for _, target := range targets {
if target.GetLabel() == constants.BootPartitionLabel {

View File

@ -35,6 +35,8 @@ func unmountWithTimeout(target string, flags int, timeout time.Duration) error {
}
// UnmountAll attempts to unmount all the mounted filesystems via "self" mountinfo.
//
//nolint:gocyclo
func UnmountAll() error {
// timeout in seconds
const timeout = 10
@ -42,7 +44,7 @@ func UnmountAll() error {
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for range timeout {
for iteration := range timeout {
mounts, err := readMountInfo()
if err != nil {
return err
@ -50,6 +52,8 @@ func UnmountAll() error {
failedUnmounts := 0
var failedMountPoints []string
for _, mountInfo := range mounts {
if mountInfo.MountPoint == "" {
continue
@ -63,6 +67,8 @@ func UnmountAll() error {
log.Printf("failed unmounting %s: %s", mountInfo.MountPoint, err)
failedUnmounts++
failedMountPoints = append(failedMountPoints, mountInfo.MountPoint)
}
}
}
@ -71,6 +77,13 @@ func UnmountAll() error {
break
}
// Log mount users on first and last failure to help diagnose busy mounts.
if iteration == 0 || iteration == timeout-1 {
for _, mp := range failedMountPoints {
logMountUsers(log.Printf, mp)
}
}
log.Printf("retrying %d unmount operations...", failedUnmounts)
<-ticker.C

View File

@ -210,6 +210,7 @@ func PseudoLate(printer func(string, ...any)) Managers {
WithTarget("/run"),
WithMountAttributes(unix.MOUNT_ATTR_NOSUID|unix.MOUNT_ATTR_NOEXEC|unix.MOUNT_ATTR_RELATIME),
WithSelinuxLabel(constants.RunSelinuxLabel),
WithRecursiveUnmount(),
WithFsopen(
"tmpfs",
fsopen.WithStringParameter("mode", "0755"),

View File

@ -146,9 +146,17 @@ func (p *Point) Unmount(opts UnmountOptions) error {
return nil
}
return p.retry(func() error {
err := p.retry(func() error {
return SafeUnmount(context.Background(), opts.Printer, p.target, opts.Recursive)
}, true)
if err != nil {
logSubmounts(opts.Printer, p.target)
logMountUsers(opts.Printer, p.target)
return err
}
return nil
}
// IsMounted checks if the mount point is mounted by checking the mount on the target.

View File

@ -5,18 +5,31 @@
package mount
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"golang.org/x/sys/unix"
)
func trySyncMount(target string, printer func(string, ...any)) error {
// open the mountpoint directory to get an fd on the fs
func trySyncMount(target string) error {
// Try the directory path first, then fall back to a file mountpoint.
fd, err := unix.Open(target, unix.O_RDONLY|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open %q: %w", target, err)
if errors.Is(err, unix.ENOTDIR) {
fd, err = unix.Open(target, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open %q as file: %w", target, err)
}
} else {
return fmt.Errorf("open %q as directory: %w", target, err)
}
}
defer unix.Close(fd) //nolint:errcheck
@ -25,8 +38,6 @@ func trySyncMount(target string, printer func(string, ...any)) error {
return fmt.Errorf("SYS_SYNCFS %q: %w", target, err)
}
printer("syncfs(%s) ok", target)
return nil
}
@ -34,7 +45,7 @@ func unmountLoop(ctx context.Context, printer func(string, ...any), target strin
errCh := make(chan error, 1)
// we need to try to sync fs before
if err := trySyncMount(target, printer); err != nil {
if err := trySyncMount(target); err != nil {
printer("sync failed: %s", err)
}
@ -103,8 +114,6 @@ func SafeUnmount(ctx context.Context, printer func(string, ...any), target strin
return err
}
logSubmounts(printer, target)
printer("unmounting %s with force", target)
ok, err = unmountLoop(ctx, printer, target, unix.MNT_FORCE, unmountForceTimeout, " with force flag")
@ -113,8 +122,6 @@ func SafeUnmount(ctx context.Context, printer func(string, ...any), target strin
return err
}
logSubmounts(printer, target)
return fmt.Errorf("unmounting %s with force flag timed out", target)
}
@ -139,3 +146,109 @@ func logSubmounts(printer func(string, ...any), target string) {
printer("submounts on %s: %v", target, submounts)
}
}
// logMountUsers scans /proc to find processes that have open file descriptors,
// working directories, or memory-mapped files under the given mount target.
// This helps diagnose "device or resource busy" errors during unmount.
//
//nolint:gocyclo,cyclop
func logMountUsers(printer func(string, ...any), target string) {
entries, err := os.ReadDir("/proc")
if err != nil {
printer("failed to read /proc: %v", err)
return
}
targetWithSlash := target + "/"
found := false
for _, entry := range entries {
if !entry.IsDir() {
continue
}
pid, err := strconv.Atoi(entry.Name())
if err != nil {
continue // not a PID directory
}
procPath := filepath.Join("/proc", entry.Name())
var offendingPaths []string
// Check cwd.
if cwd, err := os.Readlink(filepath.Join(procPath, "cwd")); err == nil {
if cwd == target || strings.HasPrefix(cwd, targetWithSlash) {
offendingPaths = append(offendingPaths, "cwd="+cwd)
}
}
// Check root.
if root, err := os.Readlink(filepath.Join(procPath, "root")); err == nil {
if root == target || strings.HasPrefix(root, targetWithSlash) {
offendingPaths = append(offendingPaths, "root="+root)
}
}
// Check open file descriptors.
if fds, err := os.ReadDir(filepath.Join(procPath, "fd")); err == nil {
for _, fd := range fds {
if link, err := os.Readlink(filepath.Join(procPath, "fd", fd.Name())); err == nil {
if link == target || strings.HasPrefix(link, targetWithSlash) {
offendingPaths = append(offendingPaths, "fd/"+fd.Name()+"="+link)
}
}
}
}
// Check memory-mapped files.
if f, err := os.Open(filepath.Join(procPath, "maps")); err == nil {
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
// maps format: address perms offset dev inode pathname
// pathname starts after the 5th field
fields := strings.Fields(line)
if len(fields) >= 6 {
mappedPath := fields[len(fields)-1]
if mappedPath == target || strings.HasPrefix(mappedPath, targetWithSlash) {
offendingPaths = append(offendingPaths, "mmap="+mappedPath)
}
}
}
f.Close() //nolint:errcheck
}
if len(offendingPaths) == 0 {
continue
}
found = true
// Read process identity.
comm := "<unknown>"
if data, err := os.ReadFile(filepath.Join(procPath, "comm")); err == nil {
comm = strings.TrimSpace(string(data))
}
cmdline := ""
if data, err := os.ReadFile(filepath.Join(procPath, "cmdline")); err == nil {
// cmdline uses null bytes as separators
cmdline = strings.ReplaceAll(strings.TrimRight(string(data), "\x00"), "\x00", " ")
}
printer("mount %s held by pid %d (%s) cmdline=[%s]: %s",
target, pid, comm, cmdline, strings.Join(offendingPaths, ", "))
}
if !found {
printer("mount %s is busy but no processes found holding it (may be held by kernel references)", target)
} else {
printer("if you see this message, report a bug with the above information to help us identify and fix the issue")
}
}

View File

@ -908,6 +908,9 @@ const (
// BootTimeout is the timeout to run all services.
BootTimeout = 70 * time.Minute
// EmergencyCleanupTimeout is the timeout to wait for the volume cleanup in case of terminal failure during booting.
EmergencyCleanupTimeout = 30 * time.Second
// FailurePauseTimeout is the timeout for the sequencer failures which can be fixed by updating the machine config.
FailurePauseTimeout = 35 * time.Minute