diff --git a/cmd/talosctl/cmd/mgmt/cluster/create.go b/cmd/talosctl/cmd/mgmt/cluster/create.go index 23736dd87..52f13a056 100644 --- a/cmd/talosctl/cmd/mgmt/cluster/create.go +++ b/cmd/talosctl/cmd/mgmt/cluster/create.go @@ -392,6 +392,15 @@ func create(ctx context.Context) (err error) { ) } + if !bootloaderEnabled { + // disable kexec, as this would effectively use the bootloader + genOptions = append(genOptions, + generate.WithSysctls(map[string]string{ + "kernel.kexec_load_disabled": "1", + }), + ) + } + defaultInternalLB, defaultEndpoint := provisioner.GetLoadBalancers(request.Network) if defaultInternalLB == "" { diff --git a/hack/release.toml b/hack/release.toml index 5c28b52de..3ba829cf0 100644 --- a/hack/release.toml +++ b/hack/release.toml @@ -16,13 +16,29 @@ preface = """\ [notes] [notes.clouds] - title = "Hetzner, Scaleway and Upcloud" + title = "Hetzner, Scaleway, Upcloud and Vultr" description = """\ Talos now natively supports three new cloud platforms: * [Hetzner](https://www.hetzner.com/) * [Scaleway](https://www.scaleway.com/en/) * [Upcloud](https://upcloud.com/) +* [Vultr](https://www.vultr.com/) +""" + + [notes.kexec] + title = "Reboots via kexec" + description = """\ +Talos now reboots by default via kexec syscall which means BIOS POST process is skipped. +On bare-metal hardware BIOS POST process might take 10-15 minutes, so Talos reboots 10-15 minutes faster on bare-metal. + +Kexec support can be disabled with the following change to the machine configuration: + +``` +machine: + sysctls: + kernel.kexec_load_disabled: "1" +``` """ [notes.kubespan] diff --git a/internal/app/machined/pkg/runtime/state.go b/internal/app/machined/pkg/runtime/state.go index 515b2e379..3ed16528d 100644 --- a/internal/app/machined/pkg/runtime/state.go +++ b/internal/app/machined/pkg/runtime/state.go @@ -35,6 +35,8 @@ type MachineState interface { IsInstallStaged() bool StagedInstallImageRef() string StagedInstallOptions() []byte + KexecPrepared(bool) + IsKexecPrepared() bool } // ClusterState defines the cluster state. diff --git a/internal/app/machined/pkg/runtime/v1alpha1/bootloader/grub/grub.go b/internal/app/machined/pkg/runtime/v1alpha1/bootloader/grub/grub.go index 45cefdb75..649e5ce20 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/bootloader/grub/grub.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/bootloader/grub/grub.go @@ -5,6 +5,7 @@ package grub import ( + "bufio" "bytes" "errors" "fmt" @@ -109,6 +110,83 @@ func (g *Grub) Labels() (current, next string, err error) { return current, next, err } +// BootEntry describes GRUB boot entry. +type BootEntry struct { + // Paths to kernel and initramfs image. + Linux, Initrd string + // Cmdline for the kernel. + Cmdline string +} + +// GetCurrentEntry fetches current boot entry, vmlinuz/initrd path, boot args. +// +//nolint:gocyclo +func (g *Grub) GetCurrentEntry() (*BootEntry, error) { + f, err := os.Open(GrubConfig) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + + return nil, err + } + + defer f.Close() //nolint:errcheck + + scanner := bufio.NewScanner(f) + + entry := &BootEntry{} + + var ( + defaultEntry string + currentEntry string + ) + + for scanner.Scan() { + line := scanner.Text() + + switch { + case strings.HasPrefix(line, "set default"): + matches := regexp.MustCompile(`set default="(.*)"`).FindStringSubmatch(line) + if len(matches) != 2 { + return nil, fmt.Errorf("malformed default entry: %q", line) + } + + defaultEntry = matches[1] + case strings.HasPrefix(line, "menuentry"): + matches := regexp.MustCompile(`menuentry "(.*)"`).FindStringSubmatch(line) + if len(matches) != 2 { + return nil, fmt.Errorf("malformed menuentry: %q", line) + } + + currentEntry = matches[1] + case strings.HasPrefix(line, " linux "): + if currentEntry != defaultEntry { + continue + } + + parts := strings.SplitN(line[8:], " ", 2) + + entry.Linux = parts[0] + if len(parts) == 2 { + entry.Cmdline = parts[1] + } + case strings.HasPrefix(line, " initrd "): + if currentEntry != defaultEntry { + continue + } + + entry.Initrd = line[9:] + } + } + + if entry.Linux == "" || entry.Initrd == "" { + return nil, scanner.Err() + } + + return entry, scanner.Err() +} + // Install implements the Bootloader interface. It sets up grub with the // specified kernel parameters. // diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go index 0b88fdcc2..67664cc1a 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go @@ -168,6 +168,15 @@ func (*Sequencer) Install(r runtime.Runtime) []runtime.Phase { ).Append( "stopEverything", StopAllServices, + ).Append( + "mountBoot", + MountBootPartition, + ).Append( + "kexec", + KexecPrepare, + ).Append( + "unmountBoot", + UnmountBootPartition, ).Append( "reboot", Reboot, @@ -423,6 +432,15 @@ func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []ru ).Append( "stopEverything", StopAllServices, + ).Append( + "mountBoot", + MountBootPartition, + ).Append( + "kexec", + KexecPrepare, + ).Append( + "unmountBoot", + UnmountBootPartition, ).Append( "reboot", Reboot, @@ -459,6 +477,15 @@ func stopAllPhaselist(r runtime.Runtime) PhaseList { "unmountSystem", UnmountEphemeralPartition, UnmountStatePartition, + ).Append( + "mountBoot", + MountBootPartition, + ).Append( + "kexec", + KexecPrepare, + ).Append( + "unmountBoot", + UnmountBootPartition, ) } diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go index 188f0782d..af58f795f 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go @@ -1424,11 +1424,17 @@ func UpdateBootloader(seq runtime.Sequence, data interface{}) (runtime.TaskExecu // Reboot represents the Reboot task. func Reboot(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) { return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { + rebootCmd := unix.LINUX_REBOOT_CMD_RESTART + + if r.State().Machine().IsKexecPrepared() { + rebootCmd = unix.LINUX_REBOOT_CMD_KEXEC + } + r.Events().Publish(&machineapi.RestartEvent{ - Cmd: unix.LINUX_REBOOT_CMD_RESTART, + Cmd: int64(rebootCmd), }) - return runtime.RebootError{Cmd: unix.LINUX_REBOOT_CMD_RESTART} + return runtime.RebootError{Cmd: rebootCmd} }, "reboot" } @@ -1710,3 +1716,70 @@ func ActivateLogicalVolumes(seq runtime.Sequence, data interface{}) (runtime.Tas return nil }, "activateLogicalVolumes" } + +// KexecPrepare loads next boot kernel via kexec_file_load. +func KexecPrepare(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) { + return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error { + if r.Config() == nil { + return nil + } + + disk, err := r.Config().Machine().Install().Disk() + if err != nil { + return err + } + + grub := &grub.Grub{ + BootDisk: disk, + } + + entry, err := grub.GetCurrentEntry() + if err != nil { + return err + } + + if entry == nil { + return nil + } + + kernelPath := filepath.Join(constants.BootMountPoint, entry.Linux) + initrdPath := filepath.Join(constants.BootMountPoint, entry.Initrd) + + kernel, err := os.Open(kernelPath) + if err != nil { + return err + } + + defer kernel.Close() //nolint:errcheck + + initrd, err := os.Open(initrdPath) + if err != nil { + return err + } + + defer initrd.Close() //nolint:errcheck + + cmdline := strings.TrimSpace(entry.Cmdline) + + if err = unix.KexecFileLoad(int(kernel.Fd()), int(initrd.Fd()), cmdline, 0); err != nil { + switch { + case errors.Is(err, unix.ENOSYS): + log.Printf("kexec support is disabled in the kernel") + + return nil + case errors.Is(err, unix.EPERM): + log.Printf("kexec support is disabled via sysctl") + + return nil + default: + return fmt.Errorf("error loading kernel for kexec: %w", err) + } + } + + log.Printf("prepared kexec environment kernel=%q initrd=%q cmdline=%q", kernelPath, initrdPath, cmdline) + + r.State().Machine().KexecPrepared(true) + + return nil + }, "kexecPrepare" +} diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_state.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_state.go index 596237f46..a88d75358 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_state.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_state.go @@ -37,6 +37,8 @@ type MachineState struct { stagedInstall bool stagedInstallImageRef string stagedInstallOptions []byte + + kexecPrepared bool } // ClusterState represents the cluster's state. @@ -182,13 +184,15 @@ func (s *MachineState) Disk(options ...disk.Option) *probe.ProbedBlockDevice { func (s *MachineState) Close() error { var result *multierror.Error - for _, disk := range s.disks { + for label, disk := range s.disks { if err := disk.Close(); err != nil { e := multierror.Append(result, err) if e != nil { return e } } + + delete(s.disks, label) } return result.ErrorOrNil() @@ -215,3 +219,13 @@ func (s *MachineState) StagedInstallImageRef() string { func (s *MachineState) StagedInstallOptions() []byte { return s.stagedInstallOptions } + +// KexecPrepared implements the machine state interface. +func (s *MachineState) KexecPrepared(prepared bool) { + s.kexecPrepared = prepared +} + +// IsKexecPrepared implements the machine state interface. +func (s *MachineState) IsKexecPrepared() bool { + return s.kexecPrepared +} diff --git a/pkg/machinery/config/types/v1alpha1/generate/generate.go b/pkg/machinery/config/types/v1alpha1/generate/generate.go index 6e67db132..7de8e61f3 100644 --- a/pkg/machinery/config/types/v1alpha1/generate/generate.go +++ b/pkg/machinery/config/types/v1alpha1/generate/generate.go @@ -81,6 +81,7 @@ type Input struct { RegistryConfig map[string]*v1alpha1.RegistryConfig MachineDisks []*v1alpha1.MachineDisk SystemDiskEncryptionConfig *v1alpha1.SystemDiskEncryptionConfig + Sysctls map[string]string Debug bool Persist bool @@ -488,6 +489,7 @@ func NewInput(clustername, endpoint, kubernetesVersion string, secrets *SecretsB CNIConfig: options.CNIConfig, RegistryMirrors: options.RegistryMirrors, RegistryConfig: options.RegistryConfig, + Sysctls: options.Sysctls, Debug: options.Debug, Persist: options.Persist, AllowSchedulingOnMasters: options.AllowSchedulingOnMasters, diff --git a/pkg/machinery/config/types/v1alpha1/generate/init.go b/pkg/machinery/config/types/v1alpha1/generate/init.go index 9f6db61aa..b1418a2b3 100644 --- a/pkg/machinery/config/types/v1alpha1/generate/init.go +++ b/pkg/machinery/config/types/v1alpha1/generate/init.go @@ -51,6 +51,7 @@ func initUd(in *Input) (*v1alpha1.Config, error) { }, MachineDisks: in.MachineDisks, MachineSystemDiskEncryption: in.SystemDiskEncryptionConfig, + MachineSysctls: in.Sysctls, MachineFeatures: &v1alpha1.FeaturesConfig{}, } diff --git a/pkg/machinery/config/types/v1alpha1/generate/options.go b/pkg/machinery/config/types/v1alpha1/generate/options.go index af45a8e83..b4d642b44 100644 --- a/pkg/machinery/config/types/v1alpha1/generate/options.go +++ b/pkg/machinery/config/types/v1alpha1/generate/options.go @@ -212,6 +212,21 @@ func WithClusterDiscovery() GenOption { } } +// WithSysctls merges list of sysctls with new values. +func WithSysctls(params map[string]string) GenOption { + return func(o *GenOptions) error { + if o.Sysctls == nil { + o.Sysctls = make(map[string]string) + } + + for k, v := range params { + o.Sysctls[k] = v + } + + return nil + } +} + // GenOptions describes generate parameters. type GenOptions struct { EndpointList []string @@ -223,6 +238,7 @@ type GenOptions struct { CNIConfig *v1alpha1.CNIConfig RegistryMirrors map[string]*v1alpha1.RegistryMirrorConfig RegistryConfig map[string]*v1alpha1.RegistryConfig + Sysctls map[string]string DNSDomain string Debug bool Persist bool diff --git a/pkg/machinery/config/types/v1alpha1/generate/worker.go b/pkg/machinery/config/types/v1alpha1/generate/worker.go index 0645bad45..c82009c84 100644 --- a/pkg/machinery/config/types/v1alpha1/generate/worker.go +++ b/pkg/machinery/config/types/v1alpha1/generate/worker.go @@ -51,6 +51,7 @@ func workerUd(in *Input) (*v1alpha1.Config, error) { }, MachineDisks: in.MachineDisks, MachineSystemDiskEncryption: in.SystemDiskEncryptionConfig, + MachineSysctls: in.Sysctls, MachineFeatures: &v1alpha1.FeaturesConfig{}, } diff --git a/website/content/docs/v0.13/Introduction/support-matrix.md b/website/content/docs/v0.13/Introduction/support-matrix.md index 0e5fe96a5..b6bf8c0e7 100644 --- a/website/content/docs/v0.13/Introduction/support-matrix.md +++ b/website/content/docs/v0.13/Introduction/support-matrix.md @@ -11,7 +11,7 @@ weight: 6 | Kubernetes | 1.22, 1.21, 1.20 | 1.22, 1.21, 1.20 | | Architecture | amd64, arm64 | | **Platforms** | | | -| - cloud | AWS, GCP, Azure, Digital Ocean, OpenStack | +| - cloud | AWS, GCP, Azure, Digital Ocean, Hetzner, OpenStack, Scaleway, Vultr, Upcloud | AWS, GCP, Azure, Digital Ocean, OpenStack | | - bare metal | x86: BIOS, UEFI; arm64: UEFI; boot: ISO, PXE, disk image | | - virtualized | VMware, Hyper-V, KVM, Proxmox, Xen | | - SBCs | Raspberry Pi4, Banana Pi M64, Pine64, and other |