feat: reboot via kexec

This should save a lot of time on BIOS/POST time with bare metal
hardware.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2021-09-07 00:13:38 +03:00
parent 3de505c894
commit d0585fb6b3
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
12 changed files with 244 additions and 5 deletions

View File

@ -392,6 +392,15 @@ func create(ctx context.Context) (err error) {
)
}
if !bootloaderEnabled {
// disable kexec, as this would effectively use the bootloader
genOptions = append(genOptions,
generate.WithSysctls(map[string]string{
"kernel.kexec_load_disabled": "1",
}),
)
}
defaultInternalLB, defaultEndpoint := provisioner.GetLoadBalancers(request.Network)
if defaultInternalLB == "" {

View File

@ -16,13 +16,29 @@ preface = """\
[notes]
[notes.clouds]
title = "Hetzner, Scaleway and Upcloud"
title = "Hetzner, Scaleway, Upcloud and Vultr"
description = """\
Talos now natively supports three new cloud platforms:
* [Hetzner](https://www.hetzner.com/)
* [Scaleway](https://www.scaleway.com/en/)
* [Upcloud](https://upcloud.com/)
* [Vultr](https://www.vultr.com/)
"""
[notes.kexec]
title = "Reboots via kexec"
description = """\
Talos now reboots by default via kexec syscall which means BIOS POST process is skipped.
On bare-metal hardware BIOS POST process might take 10-15 minutes, so Talos reboots 10-15 minutes faster on bare-metal.
Kexec support can be disabled with the following change to the machine configuration:
```
machine:
sysctls:
kernel.kexec_load_disabled: "1"
```
"""
[notes.kubespan]

View File

@ -35,6 +35,8 @@ type MachineState interface {
IsInstallStaged() bool
StagedInstallImageRef() string
StagedInstallOptions() []byte
KexecPrepared(bool)
IsKexecPrepared() bool
}
// ClusterState defines the cluster state.

View File

@ -5,6 +5,7 @@
package grub
import (
"bufio"
"bytes"
"errors"
"fmt"
@ -109,6 +110,83 @@ func (g *Grub) Labels() (current, next string, err error) {
return current, next, err
}
// BootEntry describes GRUB boot entry.
type BootEntry struct {
// Paths to kernel and initramfs image.
Linux, Initrd string
// Cmdline for the kernel.
Cmdline string
}
// GetCurrentEntry fetches current boot entry, vmlinuz/initrd path, boot args.
//
//nolint:gocyclo
func (g *Grub) GetCurrentEntry() (*BootEntry, error) {
f, err := os.Open(GrubConfig)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
return nil, err
}
defer f.Close() //nolint:errcheck
scanner := bufio.NewScanner(f)
entry := &BootEntry{}
var (
defaultEntry string
currentEntry string
)
for scanner.Scan() {
line := scanner.Text()
switch {
case strings.HasPrefix(line, "set default"):
matches := regexp.MustCompile(`set default="(.*)"`).FindStringSubmatch(line)
if len(matches) != 2 {
return nil, fmt.Errorf("malformed default entry: %q", line)
}
defaultEntry = matches[1]
case strings.HasPrefix(line, "menuentry"):
matches := regexp.MustCompile(`menuentry "(.*)"`).FindStringSubmatch(line)
if len(matches) != 2 {
return nil, fmt.Errorf("malformed menuentry: %q", line)
}
currentEntry = matches[1]
case strings.HasPrefix(line, " linux "):
if currentEntry != defaultEntry {
continue
}
parts := strings.SplitN(line[8:], " ", 2)
entry.Linux = parts[0]
if len(parts) == 2 {
entry.Cmdline = parts[1]
}
case strings.HasPrefix(line, " initrd "):
if currentEntry != defaultEntry {
continue
}
entry.Initrd = line[9:]
}
}
if entry.Linux == "" || entry.Initrd == "" {
return nil, scanner.Err()
}
return entry, scanner.Err()
}
// Install implements the Bootloader interface. It sets up grub with the
// specified kernel parameters.
//

View File

@ -168,6 +168,15 @@ func (*Sequencer) Install(r runtime.Runtime) []runtime.Phase {
).Append(
"stopEverything",
StopAllServices,
).Append(
"mountBoot",
MountBootPartition,
).Append(
"kexec",
KexecPrepare,
).Append(
"unmountBoot",
UnmountBootPartition,
).Append(
"reboot",
Reboot,
@ -423,6 +432,15 @@ func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []ru
).Append(
"stopEverything",
StopAllServices,
).Append(
"mountBoot",
MountBootPartition,
).Append(
"kexec",
KexecPrepare,
).Append(
"unmountBoot",
UnmountBootPartition,
).Append(
"reboot",
Reboot,
@ -459,6 +477,15 @@ func stopAllPhaselist(r runtime.Runtime) PhaseList {
"unmountSystem",
UnmountEphemeralPartition,
UnmountStatePartition,
).Append(
"mountBoot",
MountBootPartition,
).Append(
"kexec",
KexecPrepare,
).Append(
"unmountBoot",
UnmountBootPartition,
)
}

View File

@ -1424,11 +1424,17 @@ func UpdateBootloader(seq runtime.Sequence, data interface{}) (runtime.TaskExecu
// Reboot represents the Reboot task.
func Reboot(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {
rebootCmd := unix.LINUX_REBOOT_CMD_RESTART
if r.State().Machine().IsKexecPrepared() {
rebootCmd = unix.LINUX_REBOOT_CMD_KEXEC
}
r.Events().Publish(&machineapi.RestartEvent{
Cmd: unix.LINUX_REBOOT_CMD_RESTART,
Cmd: int64(rebootCmd),
})
return runtime.RebootError{Cmd: unix.LINUX_REBOOT_CMD_RESTART}
return runtime.RebootError{Cmd: rebootCmd}
}, "reboot"
}
@ -1710,3 +1716,70 @@ func ActivateLogicalVolumes(seq runtime.Sequence, data interface{}) (runtime.Tas
return nil
}, "activateLogicalVolumes"
}
// KexecPrepare loads next boot kernel via kexec_file_load.
func KexecPrepare(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
if r.Config() == nil {
return nil
}
disk, err := r.Config().Machine().Install().Disk()
if err != nil {
return err
}
grub := &grub.Grub{
BootDisk: disk,
}
entry, err := grub.GetCurrentEntry()
if err != nil {
return err
}
if entry == nil {
return nil
}
kernelPath := filepath.Join(constants.BootMountPoint, entry.Linux)
initrdPath := filepath.Join(constants.BootMountPoint, entry.Initrd)
kernel, err := os.Open(kernelPath)
if err != nil {
return err
}
defer kernel.Close() //nolint:errcheck
initrd, err := os.Open(initrdPath)
if err != nil {
return err
}
defer initrd.Close() //nolint:errcheck
cmdline := strings.TrimSpace(entry.Cmdline)
if err = unix.KexecFileLoad(int(kernel.Fd()), int(initrd.Fd()), cmdline, 0); err != nil {
switch {
case errors.Is(err, unix.ENOSYS):
log.Printf("kexec support is disabled in the kernel")
return nil
case errors.Is(err, unix.EPERM):
log.Printf("kexec support is disabled via sysctl")
return nil
default:
return fmt.Errorf("error loading kernel for kexec: %w", err)
}
}
log.Printf("prepared kexec environment kernel=%q initrd=%q cmdline=%q", kernelPath, initrdPath, cmdline)
r.State().Machine().KexecPrepared(true)
return nil
}, "kexecPrepare"
}

View File

@ -37,6 +37,8 @@ type MachineState struct {
stagedInstall bool
stagedInstallImageRef string
stagedInstallOptions []byte
kexecPrepared bool
}
// ClusterState represents the cluster's state.
@ -182,13 +184,15 @@ func (s *MachineState) Disk(options ...disk.Option) *probe.ProbedBlockDevice {
func (s *MachineState) Close() error {
var result *multierror.Error
for _, disk := range s.disks {
for label, disk := range s.disks {
if err := disk.Close(); err != nil {
e := multierror.Append(result, err)
if e != nil {
return e
}
}
delete(s.disks, label)
}
return result.ErrorOrNil()
@ -215,3 +219,13 @@ func (s *MachineState) StagedInstallImageRef() string {
func (s *MachineState) StagedInstallOptions() []byte {
return s.stagedInstallOptions
}
// KexecPrepared implements the machine state interface.
func (s *MachineState) KexecPrepared(prepared bool) {
s.kexecPrepared = prepared
}
// IsKexecPrepared implements the machine state interface.
func (s *MachineState) IsKexecPrepared() bool {
return s.kexecPrepared
}

View File

@ -81,6 +81,7 @@ type Input struct {
RegistryConfig map[string]*v1alpha1.RegistryConfig
MachineDisks []*v1alpha1.MachineDisk
SystemDiskEncryptionConfig *v1alpha1.SystemDiskEncryptionConfig
Sysctls map[string]string
Debug bool
Persist bool
@ -488,6 +489,7 @@ func NewInput(clustername, endpoint, kubernetesVersion string, secrets *SecretsB
CNIConfig: options.CNIConfig,
RegistryMirrors: options.RegistryMirrors,
RegistryConfig: options.RegistryConfig,
Sysctls: options.Sysctls,
Debug: options.Debug,
Persist: options.Persist,
AllowSchedulingOnMasters: options.AllowSchedulingOnMasters,

View File

@ -51,6 +51,7 @@ func initUd(in *Input) (*v1alpha1.Config, error) {
},
MachineDisks: in.MachineDisks,
MachineSystemDiskEncryption: in.SystemDiskEncryptionConfig,
MachineSysctls: in.Sysctls,
MachineFeatures: &v1alpha1.FeaturesConfig{},
}

View File

@ -212,6 +212,21 @@ func WithClusterDiscovery() GenOption {
}
}
// WithSysctls merges list of sysctls with new values.
func WithSysctls(params map[string]string) GenOption {
return func(o *GenOptions) error {
if o.Sysctls == nil {
o.Sysctls = make(map[string]string)
}
for k, v := range params {
o.Sysctls[k] = v
}
return nil
}
}
// GenOptions describes generate parameters.
type GenOptions struct {
EndpointList []string
@ -223,6 +238,7 @@ type GenOptions struct {
CNIConfig *v1alpha1.CNIConfig
RegistryMirrors map[string]*v1alpha1.RegistryMirrorConfig
RegistryConfig map[string]*v1alpha1.RegistryConfig
Sysctls map[string]string
DNSDomain string
Debug bool
Persist bool

View File

@ -51,6 +51,7 @@ func workerUd(in *Input) (*v1alpha1.Config, error) {
},
MachineDisks: in.MachineDisks,
MachineSystemDiskEncryption: in.SystemDiskEncryptionConfig,
MachineSysctls: in.Sysctls,
MachineFeatures: &v1alpha1.FeaturesConfig{},
}

View File

@ -11,7 +11,7 @@ weight: 6
| Kubernetes | 1.22, 1.21, 1.20 | 1.22, 1.21, 1.20 |
| Architecture | amd64, arm64 |
| **Platforms** | | |
| - cloud | AWS, GCP, Azure, Digital Ocean, OpenStack |
| - cloud | AWS, GCP, Azure, Digital Ocean, Hetzner, OpenStack, Scaleway, Vultr, Upcloud | AWS, GCP, Azure, Digital Ocean, OpenStack |
| - bare metal | x86: BIOS, UEFI; arm64: UEFI; boot: ISO, PXE, disk image |
| - virtualized | VMware, Hyper-V, KVM, Proxmox, Xen |
| - SBCs | Raspberry Pi4, Banana Pi M64, Pine64, and other |