feat: provide a way to configure IPMI PXE method

Fixes #274

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2021-12-09 00:14:18 +03:00
parent 2ff14c4528
commit 4cfdedaf97
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
10 changed files with 106 additions and 61 deletions

View File

@ -69,6 +69,7 @@ spec:
- --insecure-wipe=${SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE:=true}
- --auto-bmc-setup=${SIDERO_CONTROLLER_MANAGER_AUTO_BMC_SETUP:=true}
- --server-reboot-timeout=${SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT:=20m}
- --ipmi-pxe-method=${SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD:=uefi}
- --test-power-simulated-explicit-failure-prob=${SIDERO_CONTROLLER_MANAGER_TEST_POWER_EXPLICIT_FAILURE:=0}
- --test-power-simulated-silent-failure-prob=${SIDERO_CONTROLLER_MANAGER_TEST_POWER_SILENT_FAILURE:=0}
image: controller:latest

View File

@ -32,6 +32,7 @@ import (
infrav1 "github.com/talos-systems/sidero/app/caps-controller-manager/api/v1alpha3"
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
"github.com/talos-systems/sidero/app/sidero-controller-manager/pkg/constants"
)
@ -49,6 +50,7 @@ type ServerReconciler struct {
Recorder record.EventRecorder
RebootTimeout time.Duration
PXEMode metal.PXEMode
}
// +kubebuilder:rbac:groups=metal.sidero.dev,resources=servers,verbs=get;list;watch;create;update;patch;delete
@ -79,7 +81,7 @@ func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return ctrl.Result{}, err
}
mgmtClient, err := metal.NewManagementClient(ctx, r.Client, &s.Spec)
mgmtClient, err := power.NewManagementClient(ctx, r.Client, &s.Spec)
if err != nil {
log.Error(err, "failed to create management client")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to initialize management client: %s.", err))
@ -196,7 +198,7 @@ func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
if !poweredOn {
// it's safe to set server to PXE boot even if it's already installed, as PXE server makes sure server is PXE booted only once
err = mgmtClient.SetPXE()
err = mgmtClient.SetPXE(r.PXEMode)
if err != nil {
log.Error(err, "failed to set PXE")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to set to PXE boot once: %s.", err))
@ -239,7 +241,7 @@ func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return f(false, ctrl.Result{RequeueAfter: constants.DefaultRequeueAfter})
}
err = mgmtClient.SetPXE()
err = mgmtClient.SetPXE(r.PXEMode)
if err != nil {
log.Error(err, "failed to set PXE")
r.Recorder.Event(serverRef, corev1.EventTypeWarning, "Server Management", fmt.Sprintf("Failed to set to PXE boot once: %s.", err))

View File

@ -15,6 +15,7 @@ import (
"time"
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
)
// Client provides management over simple API.
@ -88,7 +89,8 @@ func (c *Client) PowerCycle() error {
}
// SetPXE makes sure the node will pxe boot next time.
func (c *Client) SetPXE() error {
func (c *Client) SetPXE(mode metal.PXEMode) error {
// no way to enforce mode via QEMU API
return c.postRequest("/pxeboot")
}

View File

@ -0,0 +1,56 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Package power provides common interface to manage power state.
package power
import (
"context"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/ipmi"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
"github.com/talos-systems/sidero/app/sidero-controller-manager/pkg/constants"
)
// NewManagementClient builds ManagementClient from the server spec.
func NewManagementClient(ctx context.Context, client client.Client, spec *v1alpha1.ServerSpec) (metal.ManagementClient, error) {
switch {
case spec.BMC != nil:
var err error
bmcSpec := *spec.BMC
if bmcSpec.User == "" {
bmcSpec.User, err = bmcSpec.UserFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}
if bmcSpec.Pass == "" {
bmcSpec.Pass, err = bmcSpec.PassFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}
if bmcSpec.Interface == "" {
bmcSpec.Interface = "lanplus"
}
if bmcSpec.Port == 0 {
bmcSpec.Port = constants.DefaultBMCPort
}
return ipmi.NewClient(bmcSpec)
case spec.ManagementAPI != nil:
return api.NewClient(*spec.ManagementAPI)
default:
return fakeClient{}, nil
}
}

View File

@ -2,7 +2,9 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package metal
package power
import "github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
type fakeClient struct{}
@ -18,7 +20,7 @@ func (fakeClient) PowerCycle() error {
return nil
}
func (fakeClient) SetPXE() error {
func (fakeClient) SetPXE(mode metal.PXEMode) error {
return nil
}

View File

@ -10,6 +10,7 @@ import (
goipmi "github.com/pensando/goipmi"
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
)
// Link to the IPMI spec: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf
@ -92,8 +93,15 @@ func (c *Client) Status() (*goipmi.ChassisStatusResponse, error) {
}
// SetPXE makes sure the node will pxe boot next time.
func (c *Client) SetPXE() error {
return c.IPMIClient.SetBootDeviceEFI(goipmi.BootDevicePxe)
func (c *Client) SetPXE(mode metal.PXEMode) error {
switch mode {
case metal.PXEModeBIOS:
return c.IPMIClient.SetBootDevice(goipmi.BootDevicePxe)
case metal.PXEModeUEFI:
return c.IPMIClient.SetBootDeviceEFI(goipmi.BootDevicePxe)
default:
return fmt.Errorf("unsupported mode %q", mode)
}
}
// GetLANConfig fetches a given param from the LAN Config. (see 23.2).

View File

@ -5,62 +5,32 @@
// Package metal provides interfaces to manage metal machines.
package metal
import (
"context"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/ipmi"
"github.com/talos-systems/sidero/app/sidero-controller-manager/pkg/constants"
)
// ManagementClient control power and boot order of metal machine.
type ManagementClient interface {
PowerOn() error
PowerOff() error
PowerCycle() error
IsPoweredOn() (bool, error)
SetPXE() error
SetPXE(mode PXEMode) error
IsFake() bool
Close() error
}
// NewManagementClient builds ManagementClient from the server spec.
func NewManagementClient(ctx context.Context, client client.Client, spec *v1alpha1.ServerSpec) (ManagementClient, error) {
switch {
case spec.BMC != nil:
var err error
// PXEMode specifies PXE boot mode.
type PXEMode string
bmcSpec := *spec.BMC
const (
PXEModeBIOS = "bios"
PXEModeUEFI = "uefi"
)
if bmcSpec.User == "" {
bmcSpec.User, err = bmcSpec.UserFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}
if bmcSpec.Pass == "" {
bmcSpec.Pass, err = bmcSpec.PassFrom.Resolve(ctx, client)
if err != nil {
return nil, err
}
}
if bmcSpec.Interface == "" {
bmcSpec.Interface = "lanplus"
}
if bmcSpec.Port == 0 {
bmcSpec.Port = constants.DefaultBMCPort
}
return ipmi.NewClient(bmcSpec)
case spec.ManagementAPI != nil:
return api.NewClient(*spec.ManagementAPI)
func (mode PXEMode) IsValid() bool {
switch mode {
case PXEModeBIOS:
return true
case PXEModeUEFI:
return true
default:
return fakeClient{}, nil
return false
}
}

View File

@ -36,6 +36,7 @@ import (
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/ipxe"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/metadata"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/metal"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/server"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/siderolink"
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/tftp"
@ -80,6 +81,7 @@ func main() {
insecureWipe bool
autoBMCSetup bool
serverRebootTimeout time.Duration
ipmiPXEMethod string
testPowerSimulatedExplicitFailureProb float64
testPowerSimulatedSilentFailureProb float64
@ -96,6 +98,7 @@ func main() {
flag.BoolVar(&insecureWipe, "insecure-wipe", true, "Wipe head of the disk only (if false, wipe whole disk).")
flag.BoolVar(&autoBMCSetup, "auto-bmc-setup", true, "Attempt to setup BMC info automatically when agent boots.")
flag.DurationVar(&serverRebootTimeout, "server-reboot-timeout", constants.DefaultServerRebootTimeout, "Timeout to wait for the server to restart and start wipe.")
flag.StringVar(&ipmiPXEMethod, "ipmi-pxe-method", string(metal.PXEModeUEFI), fmt.Sprintf("Default method to use to set server to boot from PXE via IPMI: %s.", []string{metal.PXEModeUEFI, metal.PXEModeBIOS}))
flag.Float64Var(&testPowerSimulatedExplicitFailureProb, "test-power-simulated-explicit-failure-prob", 0, "Test failure simulation setting.")
flag.Float64Var(&testPowerSimulatedSilentFailureProb, "test-power-simulated-silent-failure-prob", 0, "Test failure simulation setting.")
@ -124,6 +127,11 @@ func main() {
}
}
if !metal.PXEMode(ipmiPXEMethod).IsValid() {
setupLog.Error(fmt.Errorf("ipmi-pxe-method is invalid"), "")
os.Exit(1)
}
ctrl.SetLogger(zap.New(func(o *zap.Options) {
o.Development = true
}))
@ -190,6 +198,7 @@ func main() {
APIReader: mgr.GetAPIReader(),
Recorder: recorder,
RebootTimeout: serverRebootTimeout,
PXEMode: metal.PXEMode(ipmiPXEMethod),
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: defaultMaxConcurrentReconciles}); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Server")
os.Exit(1)

View File

@ -15,14 +15,8 @@ preface = """\
[notes]
[notes.bmc-port]
title = "BMC Port"
[notes.ipmi-pxe-method]
title = "IPMI PXE Method"
description = """\
Sidero now supports the ability to specify the port in a server's BMC info. By default, this value will be determined by talking directly to the BMC if possible, with a fallback to port 623. The value can also simply be specied as part of editing the Server resource directly.
"""
[notes.v1alpha4]
title = "CAPI v1alpha4"
description = """\
This release of Sidero brings compatibility with CAPI v1alpha4.
IPMI PXE method (UEFI, BIOS) can now be configured with `SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD` while installing Sidero.
"""

View File

@ -24,6 +24,7 @@ variables or as variables in the `clusterctl` configuration:
- `SIDERO_CONTROLLER_MANAGER_AUTO_BMC_SETUP` (`true`): automatically attempt to configure the BMC with a `sidero` user that will be used for all IPMI tasks.
- `SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE` (`true`): wipe only the first megabyte of each disk on the server, otherwise wipe the full disk
- `SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT` (`20m`): timeout for the server reboot (how long it might take for the server to be rebooted before Sidero retries an IPMI reboot operation)
- `SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD` (`uefi`): IPMI boot from PXE method: `uefi` for UEFI boot or `bios` for BIOS boot
- `SIDERO_CONTROLLER_MANAGER_BOOT_FROM_DISK_METHOD` (`ipxe-exit`): configures the way Sidero forces server to boot from disk when server hits iPXE server after initial install: `ipxe-exit` returns iPXE script with `exit` command, `http-404` returns HTTP 404 Not Found error, `ipxe-sanboot` uses iPXE `sanboot` command to boot from the first hard disk
Sidero provides two endpoints which should be made available to the infrastructure: