feat: introduce metal agent mode

Introduce a new operating mode called the metal agent mode.

The mode is activated by the presence of a `/usr/local/etc/is-metal-agent` file under the root FS.

In this mode, Talos will:
- Only run the Initialize sequence, won't follow it up with the install/boot sequences
- Mark STATE partitions as `missing`, so Talos will always be in "not installed" state.
- Block applying configuration via API while in maintenance mode.

This mode can be used, e.g., to collect hardware information from bare-metal servers.

Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
This commit is contained in:
Utku Ozdemir 2024-10-09 23:12:46 +02:00
parent 0e15955fcc
commit 2136358d65
No known key found for this signature in database
GPG Key ID: DBD13117B0A14E93
11 changed files with 99 additions and 14 deletions

View File

@ -16,6 +16,7 @@ import (
"github.com/siderolabs/go-blockdevice/v2/encryption" "github.com/siderolabs/go-blockdevice/v2/encryption"
"go.uber.org/zap" "go.uber.org/zap"
machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/pkg/partition" "github.com/siderolabs/talos/internal/pkg/partition"
"github.com/siderolabs/talos/pkg/machinery/cel" "github.com/siderolabs/talos/pkg/machinery/cel"
"github.com/siderolabs/talos/pkg/machinery/cel/celenv" "github.com/siderolabs/talos/pkg/machinery/cel/celenv"
@ -28,8 +29,12 @@ import (
"github.com/siderolabs/talos/pkg/machinery/resources/runtime" "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
) )
var noMatch = cel.MustExpression(cel.ParseBooleanExpression("false", celenv.Empty()))
// VolumeConfigController provides volume configuration based on Talos defaults and machine configuration. // VolumeConfigController provides volume configuration based on Talos defaults and machine configuration.
type VolumeConfigController struct{} type VolumeConfigController struct {
V1Alpha1Mode machinedruntime.Mode
}
// Name implements controller.Controller interface. // Name implements controller.Controller interface.
func (ctrl *VolumeConfigController) Name() string { func (ctrl *VolumeConfigController) Name() string {
@ -291,8 +296,14 @@ func (ctrl *VolumeConfigController) manageStateNoConfig(encryptionMeta *runtime.
TargetPath: constants.StateMountPoint, TargetPath: constants.StateMountPoint,
} }
match := labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel)
if ctrl.V1Alpha1Mode.IsAgent() { // mark as missing
match = noMatch
}
// check here - make match false
vc.TypedSpec().Locator = block.LocatorSpec{ vc.TypedSpec().Locator = block.LocatorSpec{
Match: labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel), Match: match,
} }
if encryptionMeta != nil { if encryptionMeta != nil {

View File

@ -25,6 +25,7 @@ import (
"google.golang.org/grpc" "google.golang.org/grpc"
"google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials"
machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/maintenance" "github.com/siderolabs/talos/internal/app/maintenance"
"github.com/siderolabs/talos/pkg/grpc/factory" "github.com/siderolabs/talos/pkg/grpc/factory"
"github.com/siderolabs/talos/pkg/grpc/middleware/authz" "github.com/siderolabs/talos/pkg/grpc/middleware/authz"
@ -37,6 +38,7 @@ import (
// MaintenanceServiceController runs the maintenance service based on the configuration. // MaintenanceServiceController runs the maintenance service based on the configuration.
type MaintenanceServiceController struct { type MaintenanceServiceController struct {
SiderolinkPeerCheckFunc authz.SideroLinkPeerCheckFunc SiderolinkPeerCheckFunc authz.SideroLinkPeerCheckFunc
V1Alpha1Mode machinedruntime.Mode
} }
// Name implements controller.Controller interface. // Name implements controller.Controller interface.
@ -122,7 +124,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller.
defer shutdownServer(context.Background()) defer shutdownServer(context.Background())
cfgCh := make(chan machineryconfig.Provider) cfgCh := make(chan machineryconfig.Provider)
srv := maintenance.New(cfgCh) srv := maintenance.New(cfgCh, ctrl.V1Alpha1Mode)
injector := &authz.Injector{ injector := &authz.Injector{
Mode: authz.ReadOnlyWithAdminOnSiderolink, Mode: authz.ReadOnlyWithAdminOnSiderolink,
@ -289,7 +291,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller.
lastCertificateFingerprint = fingerprint lastCertificateFingerprint = fingerprint
} }
if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" { if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" && !ctrl.V1Alpha1Mode.IsAgent() {
firstIP := reachableAddresses[0] firstIP := reachableAddresses[0]
logger.Sugar().Info("upload configuration using talosctl:") logger.Sugar().Info("upload configuration using talosctl:")

View File

@ -21,6 +21,8 @@ const (
ModeContainer ModeContainer
// ModeMetal is the metal runtime mode. // ModeMetal is the metal runtime mode.
ModeMetal ModeMetal
// ModeMetalAgent is the metal agent runtime mode.
ModeMetalAgent
) )
const ( const (
@ -37,14 +39,15 @@ const (
) )
const ( const (
cloud = "cloud" cloud = "cloud"
container = "container" container = "container"
metal = "metal" metal = "metal"
metalAgent = "metal-agent"
) )
// String returns the string representation of a Mode. // String returns the string representation of a Mode.
func (m Mode) String() string { func (m Mode) String() string {
return [...]string{cloud, container, metal}[m] return [...]string{cloud, container, metal, metalAgent}[m]
} }
// RequiresInstall implements config.RuntimeMode. // RequiresInstall implements config.RuntimeMode.
@ -62,6 +65,11 @@ func (m Mode) Supports(feature ModeCapability) bool {
return (m.capabilities() & uint64(feature)) != 0 return (m.capabilities() & uint64(feature)) != 0
} }
// IsAgent returns true if the mode is an agent mode (i.e. metal agent mode).
func (m Mode) IsAgent() bool {
return m == ModeMetalAgent
}
// ParseMode returns a `Mode` that matches the specified string. // ParseMode returns a `Mode` that matches the specified string.
func ParseMode(s string) (mod Mode, err error) { func ParseMode(s string) (mod Mode, err error) {
switch s { switch s {
@ -71,6 +79,8 @@ func ParseMode(s string) (mod Mode, err error) {
mod = ModeContainer mod = ModeContainer
case metal: case metal:
mod = ModeMetal mod = ModeMetal
case metalAgent:
mod = ModeMetalAgent
default: default:
return mod, fmt.Errorf("unknown runtime mode: %q", s) return mod, fmt.Errorf("unknown runtime mode: %q", s)
} }

View File

@ -43,7 +43,9 @@ const (
) )
// Metal is a discoverer for non-cloud environments. // Metal is a discoverer for non-cloud environments.
type Metal struct{} type Metal struct {
IsAgent bool
}
// Name implements the platform.Platform interface. // Name implements the platform.Platform interface.
func (m *Metal) Name() string { func (m *Metal) Name() string {
@ -118,6 +120,10 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error
// Mode implements the platform.Platform interface. // Mode implements the platform.Platform interface.
func (m *Metal) Mode() runtime.Mode { func (m *Metal) Mode() runtime.Mode {
if m.IsAgent {
return runtime.ModeMetalAgent
}
return runtime.ModeMetal return runtime.ModeMetal
} }

View File

@ -113,7 +113,11 @@ func newPlatform(platform string) (p runtime.Platform, err error) {
case "hcloud": case "hcloud":
p = &hcloud.Hcloud{} p = &hcloud.Hcloud{}
case constants.PlatformMetal: case constants.PlatformMetal:
p = &metal.Metal{} _, metalAgentCheckErr := os.Stat(constants.MetalAgentModeFlagPath)
p = &metal.Metal{
IsAgent: metalAgentCheckErr == nil,
}
case "opennebula": case "opennebula":
p = &opennebula.OpenNebula{} p = &opennebula.OpenNebula{}
case "openstack": case "openstack":

View File

@ -65,9 +65,12 @@ func (p PhaseList) AppendList(list PhaseList) PhaseList {
// Initialize is the initialize sequence. The primary goals of this sequence is // Initialize is the initialize sequence. The primary goals of this sequence is
// to load the config and enforce kernel security requirements. // to load the config and enforce kernel security requirements.
func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase { func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
mode := r.State().Platform().Mode()
phases := PhaseList{} phases := PhaseList{}
switch r.State().Platform().Mode() { //nolint:exhaustive phases = phases.Append("logMode", LogMode)
switch mode { //nolint:exhaustive
case runtime.ModeContainer: case runtime.ModeContainer:
phases = phases.Append( phases = phases.Append(
"systemRequirements", "systemRequirements",
@ -118,6 +121,10 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
ReloadMeta, ReloadMeta,
).AppendWithDeferredCheck( ).AppendWithDeferredCheck(
func() bool { func() bool {
if mode == runtime.ModeMetalAgent {
return false
}
disabledStr := procfs.ProcCmdline().Get(constants.KernelParamDashboardDisabled).First() disabledStr := procfs.ProcCmdline().Get(constants.KernelParamDashboardDisabled).First()
disabled, _ := strconv.ParseBool(pointer.SafeDeref(disabledStr)) //nolint:errcheck disabled, _ := strconv.ParseBool(pointer.SafeDeref(disabledStr)) //nolint:errcheck

View File

@ -120,6 +120,15 @@ func WaitForUSB(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
}, "waitForUSB" }, "waitForUSB"
} }
// LogMode represents the LogMode task.
func LogMode(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
logger.Printf("running in mode: %s", r.State().Platform().Mode())
return nil
}, "logMode"
}
// EnforceKSPPRequirements represents the EnforceKSPPRequirements task. // EnforceKSPPRequirements represents the EnforceKSPPRequirements task.
func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) { func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) { return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {

View File

@ -96,7 +96,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
&block.LVMActivationController{}, &block.LVMActivationController{},
&block.SystemDiskController{}, &block.SystemDiskController{},
&block.UserDiskConfigController{}, &block.UserDiskConfigController{},
&block.VolumeConfigController{}, &block.VolumeConfigController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&block.VolumeManagerController{}, &block.VolumeManagerController{},
&cluster.AffiliateMergeController{}, &cluster.AffiliateMergeController{},
cluster.NewConfigController(), cluster.NewConfigController(),
@ -314,7 +316,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
Drainer: drainer, Drainer: drainer,
}, },
&runtimecontrollers.MaintenanceConfigController{}, &runtimecontrollers.MaintenanceConfigController{},
&runtimecontrollers.MaintenanceServiceController{}, &runtimecontrollers.MaintenanceServiceController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&runtimecontrollers.MachineStatusController{ &runtimecontrollers.MachineStatusController{
V1Alpha1Events: ctrl.v1alpha1Runtime.Events(), V1Alpha1Events: ctrl.v1alpha1Runtime.Events(),
}, },

View File

@ -44,10 +44,12 @@ type Server struct {
controller runtime.Controller controller runtime.Controller
cfgCh chan<- config.Provider cfgCh chan<- config.Provider
server *grpc.Server server *grpc.Server
mode runtime.Mode
} }
// New initializes and returns a [Server]. // New initializes and returns a [Server].
func New(cfgCh chan<- config.Provider) *Server { func New(cfgCh chan<- config.Provider, mode runtime.Mode) *Server {
if runtimeController == nil { if runtimeController == nil {
panic("runtime controller is not set") panic("runtime controller is not set")
} }
@ -55,6 +57,7 @@ func New(cfgCh chan<- config.Provider) *Server {
return &Server{ return &Server{
controller: runtimeController, controller: runtimeController,
cfgCh: cfgCh, cfgCh: cfgCh,
mode: mode,
} }
} }
@ -73,6 +76,10 @@ func (s *Server) Register(obj *grpc.Server) {
// ApplyConfiguration implements [machine.MachineServiceServer]. // ApplyConfiguration implements [machine.MachineServiceServer].
func (s *Server) ApplyConfiguration(_ context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) { func (s *Server) ApplyConfiguration(_ context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
//nolint:exhaustive //nolint:exhaustive
switch in.Mode { switch in.Mode {
case machine.ApplyConfigurationRequest_TRY: case machine.ApplyConfigurationRequest_TRY:
@ -117,6 +124,10 @@ Node is running in maintenance mode and does not have a config yet.`
// GenerateConfiguration implements the [machine.MachineServiceServer] interface. // GenerateConfiguration implements the [machine.MachineServiceServer] interface.
func (s *Server) GenerateConfiguration(ctx context.Context, in *machine.GenerateConfigurationRequest) (*machine.GenerateConfigurationResponse, error) { func (s *Server) GenerateConfiguration(ctx context.Context, in *machine.GenerateConfigurationRequest) (*machine.GenerateConfigurationResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
if in.MachineConfig == nil { if in.MachineConfig == nil {
return nil, errors.New("invalid generate request") return nil, errors.New("invalid generate request")
} }
@ -162,6 +173,10 @@ func (s *Server) Version(ctx context.Context, _ *emptypb.Empty) (*machine.Versio
// Upgrade initiates an upgrade. // Upgrade initiates an upgrade.
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) { func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
if err = s.assertAdminRole(ctx); err != nil { if err = s.assertAdminRole(ctx); err != nil {
return nil, err return nil, err
} }
@ -211,6 +226,10 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
// //
//nolint:gocyclo //nolint:gocyclo
func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (*machine.ResetResponse, error) { func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (*machine.ResetResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
if err := s.assertAdminRole(ctx); err != nil { if err := s.assertAdminRole(ctx); err != nil {
return nil, err return nil, err
} }

View File

@ -16,6 +16,16 @@ import (
"github.com/siderolabs/talos/pkg/machinery/api/resource/definitions/block" "github.com/siderolabs/talos/pkg/machinery/api/resource/definitions/block"
) )
// Empty is an empty CEL environment.
var Empty = sync.OnceValue(func() *cel.Env {
env, err := cel.NewEnv()
if err != nil {
panic(err)
}
return env
})
// DiskLocator is a disk locator CEL environment. // DiskLocator is a disk locator CEL environment.
var DiskLocator = sync.OnceValue(func() *cel.Env { var DiskLocator = sync.OnceValue(func() *cel.Env {
var diskSpec block.DiskSpec var diskSpec block.DiskSpec

View File

@ -1094,6 +1094,9 @@ const (
// //
// Note: 116 = 't' and 108 = 'l' in ASCII. // Note: 116 = 't' and 108 = 'l' in ASCII.
HostDNSAddress = "169.254.116.108" HostDNSAddress = "169.254.116.108"
// MetalAgentModeFlagPath is the path to the file indicating if the node is running in Metal Agent mode.
MetalAgentModeFlagPath = "/usr/local/etc/is-metal-agent"
) )
// See https://linux.die.net/man/3/klogctl // See https://linux.die.net/man/3/klogctl