feat: introduce metal agent mode

Introduce a new operating mode called the metal agent mode.

The mode is activated by the presence of a `/usr/local/etc/is-metal-agent` file under the root FS.

In this mode, Talos will:
- Only run the Initialize sequence, won't follow it up with the install/boot sequences
- Mark STATE partitions as `missing`, so Talos will always be in "not installed" state.
- Block applying configuration via API while in maintenance mode.

This mode can be used, e.g., to collect hardware information from bare-metal servers.

Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
This commit is contained in:
Utku Ozdemir 2024-10-09 23:12:46 +02:00
parent 0e15955fcc
commit 2136358d65
No known key found for this signature in database
GPG Key ID: DBD13117B0A14E93
11 changed files with 99 additions and 14 deletions

View File

@ -16,6 +16,7 @@ import (
"github.com/siderolabs/go-blockdevice/v2/encryption"
"go.uber.org/zap"
machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/pkg/partition"
"github.com/siderolabs/talos/pkg/machinery/cel"
"github.com/siderolabs/talos/pkg/machinery/cel/celenv"
@ -28,8 +29,12 @@ import (
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)
var noMatch = cel.MustExpression(cel.ParseBooleanExpression("false", celenv.Empty()))
// VolumeConfigController provides volume configuration based on Talos defaults and machine configuration.
type VolumeConfigController struct{}
type VolumeConfigController struct {
V1Alpha1Mode machinedruntime.Mode
}
// Name implements controller.Controller interface.
func (ctrl *VolumeConfigController) Name() string {
@ -291,8 +296,14 @@ func (ctrl *VolumeConfigController) manageStateNoConfig(encryptionMeta *runtime.
TargetPath: constants.StateMountPoint,
}
match := labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel)
if ctrl.V1Alpha1Mode.IsAgent() { // mark as missing
match = noMatch
}
// check here - make match false
vc.TypedSpec().Locator = block.LocatorSpec{
Match: labelVolumeMatchAndNonEmpty(constants.StatePartitionLabel),
Match: match,
}
if encryptionMeta != nil {

View File

@ -25,6 +25,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
machinedruntime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/maintenance"
"github.com/siderolabs/talos/pkg/grpc/factory"
"github.com/siderolabs/talos/pkg/grpc/middleware/authz"
@ -37,6 +38,7 @@ import (
// MaintenanceServiceController runs the maintenance service based on the configuration.
type MaintenanceServiceController struct {
SiderolinkPeerCheckFunc authz.SideroLinkPeerCheckFunc
V1Alpha1Mode machinedruntime.Mode
}
// Name implements controller.Controller interface.
@ -122,7 +124,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller.
defer shutdownServer(context.Background())
cfgCh := make(chan machineryconfig.Provider)
srv := maintenance.New(cfgCh)
srv := maintenance.New(cfgCh, ctrl.V1Alpha1Mode)
injector := &authz.Injector{
Mode: authz.ReadOnlyWithAdminOnSiderolink,
@ -289,7 +291,7 @@ func (ctrl *MaintenanceServiceController) Run(ctx context.Context, r controller.
lastCertificateFingerprint = fingerprint
}
if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" {
if !usagePrinted && len(reachableAddresses) > 0 && lastCertificateFingerprint != "" && !ctrl.V1Alpha1Mode.IsAgent() {
firstIP := reachableAddresses[0]
logger.Sugar().Info("upload configuration using talosctl:")

View File

@ -21,6 +21,8 @@ const (
ModeContainer
// ModeMetal is the metal runtime mode.
ModeMetal
// ModeMetalAgent is the metal agent runtime mode.
ModeMetalAgent
)
const (
@ -40,11 +42,12 @@ const (
cloud = "cloud"
container = "container"
metal = "metal"
metalAgent = "metal-agent"
)
// String returns the string representation of a Mode.
func (m Mode) String() string {
return [...]string{cloud, container, metal}[m]
return [...]string{cloud, container, metal, metalAgent}[m]
}
// RequiresInstall implements config.RuntimeMode.
@ -62,6 +65,11 @@ func (m Mode) Supports(feature ModeCapability) bool {
return (m.capabilities() & uint64(feature)) != 0
}
// IsAgent returns true if the mode is an agent mode (i.e. metal agent mode).
func (m Mode) IsAgent() bool {
return m == ModeMetalAgent
}
// ParseMode returns a `Mode` that matches the specified string.
func ParseMode(s string) (mod Mode, err error) {
switch s {
@ -71,6 +79,8 @@ func ParseMode(s string) (mod Mode, err error) {
mod = ModeContainer
case metal:
mod = ModeMetal
case metalAgent:
mod = ModeMetalAgent
default:
return mod, fmt.Errorf("unknown runtime mode: %q", s)
}

View File

@ -43,7 +43,9 @@ const (
)
// Metal is a discoverer for non-cloud environments.
type Metal struct{}
type Metal struct {
IsAgent bool
}
// Name implements the platform.Platform interface.
func (m *Metal) Name() string {
@ -118,6 +120,10 @@ func (m *Metal) Configuration(ctx context.Context, r state.State) ([]byte, error
// Mode implements the platform.Platform interface.
func (m *Metal) Mode() runtime.Mode {
if m.IsAgent {
return runtime.ModeMetalAgent
}
return runtime.ModeMetal
}

View File

@ -113,7 +113,11 @@ func newPlatform(platform string) (p runtime.Platform, err error) {
case "hcloud":
p = &hcloud.Hcloud{}
case constants.PlatformMetal:
p = &metal.Metal{}
_, metalAgentCheckErr := os.Stat(constants.MetalAgentModeFlagPath)
p = &metal.Metal{
IsAgent: metalAgentCheckErr == nil,
}
case "opennebula":
p = &opennebula.OpenNebula{}
case "openstack":

View File

@ -65,9 +65,12 @@ func (p PhaseList) AppendList(list PhaseList) PhaseList {
// Initialize is the initialize sequence. The primary goals of this sequence is
// to load the config and enforce kernel security requirements.
func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
mode := r.State().Platform().Mode()
phases := PhaseList{}
switch r.State().Platform().Mode() { //nolint:exhaustive
phases = phases.Append("logMode", LogMode)
switch mode { //nolint:exhaustive
case runtime.ModeContainer:
phases = phases.Append(
"systemRequirements",
@ -118,6 +121,10 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
ReloadMeta,
).AppendWithDeferredCheck(
func() bool {
if mode == runtime.ModeMetalAgent {
return false
}
disabledStr := procfs.ProcCmdline().Get(constants.KernelParamDashboardDisabled).First()
disabled, _ := strconv.ParseBool(pointer.SafeDeref(disabledStr)) //nolint:errcheck

View File

@ -120,6 +120,15 @@ func WaitForUSB(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
}, "waitForUSB"
}
// LogMode represents the LogMode task.
func LogMode(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
logger.Printf("running in mode: %s", r.State().Platform().Mode())
return nil
}, "logMode"
}
// EnforceKSPPRequirements represents the EnforceKSPPRequirements task.
func EnforceKSPPRequirements(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) (err error) {

View File

@ -96,7 +96,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
&block.LVMActivationController{},
&block.SystemDiskController{},
&block.UserDiskConfigController{},
&block.VolumeConfigController{},
&block.VolumeConfigController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&block.VolumeManagerController{},
&cluster.AffiliateMergeController{},
cluster.NewConfigController(),
@ -314,7 +316,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
Drainer: drainer,
},
&runtimecontrollers.MaintenanceConfigController{},
&runtimecontrollers.MaintenanceServiceController{},
&runtimecontrollers.MaintenanceServiceController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&runtimecontrollers.MachineStatusController{
V1Alpha1Events: ctrl.v1alpha1Runtime.Events(),
},

View File

@ -44,10 +44,12 @@ type Server struct {
controller runtime.Controller
cfgCh chan<- config.Provider
server *grpc.Server
mode runtime.Mode
}
// New initializes and returns a [Server].
func New(cfgCh chan<- config.Provider) *Server {
func New(cfgCh chan<- config.Provider, mode runtime.Mode) *Server {
if runtimeController == nil {
panic("runtime controller is not set")
}
@ -55,6 +57,7 @@ func New(cfgCh chan<- config.Provider) *Server {
return &Server{
controller: runtimeController,
cfgCh: cfgCh,
mode: mode,
}
}
@ -73,6 +76,10 @@ func (s *Server) Register(obj *grpc.Server) {
// ApplyConfiguration implements [machine.MachineServiceServer].
func (s *Server) ApplyConfiguration(_ context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
//nolint:exhaustive
switch in.Mode {
case machine.ApplyConfigurationRequest_TRY:
@ -117,6 +124,10 @@ Node is running in maintenance mode and does not have a config yet.`
// GenerateConfiguration implements the [machine.MachineServiceServer] interface.
func (s *Server) GenerateConfiguration(ctx context.Context, in *machine.GenerateConfigurationRequest) (*machine.GenerateConfigurationResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
if in.MachineConfig == nil {
return nil, errors.New("invalid generate request")
}
@ -162,6 +173,10 @@ func (s *Server) Version(ctx context.Context, _ *emptypb.Empty) (*machine.Versio
// Upgrade initiates an upgrade.
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
if err = s.assertAdminRole(ctx); err != nil {
return nil, err
}
@ -211,6 +226,10 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
//
//nolint:gocyclo
func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (*machine.ResetResponse, error) {
if s.mode.IsAgent() {
return nil, status.Error(codes.Unimplemented, "API is not implemented in agent mode")
}
if err := s.assertAdminRole(ctx); err != nil {
return nil, err
}

View File

@ -16,6 +16,16 @@ import (
"github.com/siderolabs/talos/pkg/machinery/api/resource/definitions/block"
)
// Empty is an empty CEL environment.
var Empty = sync.OnceValue(func() *cel.Env {
env, err := cel.NewEnv()
if err != nil {
panic(err)
}
return env
})
// DiskLocator is a disk locator CEL environment.
var DiskLocator = sync.OnceValue(func() *cel.Env {
var diskSpec block.DiskSpec

View File

@ -1094,6 +1094,9 @@ const (
//
// Note: 116 = 't' and 108 = 'l' in ASCII.
HostDNSAddress = "169.254.116.108"
// MetalAgentModeFlagPath is the path to the file indicating if the node is running in Metal Agent mode.
MetalAgentModeFlagPath = "/usr/local/etc/is-metal-agent"
)
// See https://linux.die.net/man/3/klogctl