mirror of
https://github.com/siderolabs/talos.git
synced 2025-10-11 23:51:11 +02:00
Fixes: https://github.com/siderolabs/talos/issues/6815 Additionally, make it possible to run reset in maintenance mode: to enable a way for resetting system disk and remove all traces of Talos from it. The new reset flow works in a separate sequence, changed disk probe lookup to check the boot partition instead of the ephemeral one. Signed-off-by: Artem Chernyshev <artem.chernyshev@talos-systems.com>
292 lines
8.6 KiB
Go
292 lines
8.6 KiB
Go
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
package server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
|
|
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
|
|
"github.com/cosi-project/runtime/pkg/state"
|
|
"github.com/cosi-project/runtime/pkg/state/protobuf/server"
|
|
"github.com/google/uuid"
|
|
"github.com/siderolabs/go-blockdevice/blockdevice"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
"google.golang.org/protobuf/types/known/emptypb"
|
|
|
|
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
|
|
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/disk"
|
|
"github.com/siderolabs/talos/internal/app/resources"
|
|
storaged "github.com/siderolabs/talos/internal/app/storaged"
|
|
"github.com/siderolabs/talos/internal/pkg/configuration"
|
|
"github.com/siderolabs/talos/pkg/machinery/api/machine"
|
|
"github.com/siderolabs/talos/pkg/machinery/api/resource"
|
|
"github.com/siderolabs/talos/pkg/machinery/api/storage"
|
|
"github.com/siderolabs/talos/pkg/machinery/config/configloader"
|
|
v1alpha1machine "github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/machine"
|
|
"github.com/siderolabs/talos/pkg/machinery/constants"
|
|
"github.com/siderolabs/talos/pkg/version"
|
|
)
|
|
|
|
// Server implements machine.MachineService, network.NetworkService, and storage.StorageService.
|
|
type Server struct {
|
|
machine.UnimplementedMachineServiceServer
|
|
|
|
controller runtime.Controller
|
|
logger *log.Logger
|
|
cfgCh chan<- []byte
|
|
server *grpc.Server
|
|
}
|
|
|
|
// New initializes and returns a `Server`.
|
|
func New(c runtime.Controller, logger *log.Logger, cfgCh chan<- []byte) *Server {
|
|
return &Server{
|
|
controller: c,
|
|
logger: logger,
|
|
cfgCh: cfgCh,
|
|
}
|
|
}
|
|
|
|
// Register implements the factory.Registrator interface.
|
|
func (s *Server) Register(obj *grpc.Server) {
|
|
s.server = obj
|
|
|
|
// wrap resources with access filter
|
|
resourceState := s.controller.Runtime().State().V1Alpha2().Resources()
|
|
resourceState = state.WrapCore(state.Filter(resourceState, resources.AccessPolicy(resourceState)))
|
|
|
|
storage.RegisterStorageServiceServer(obj, &storaged.Server{Controller: s.controller})
|
|
machine.RegisterMachineServiceServer(obj, s)
|
|
resource.RegisterResourceServiceServer(obj, &resources.Server{Resources: resourceState}) //nolint:staticcheck
|
|
cosiv1alpha1.RegisterStateServer(obj, server.NewState(resourceState))
|
|
}
|
|
|
|
// ApplyConfiguration implements machine.MachineService.
|
|
func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfigurationRequest) (*machine.ApplyConfigurationResponse, error) {
|
|
//nolint:exhaustive
|
|
switch in.Mode {
|
|
case machine.ApplyConfigurationRequest_TRY:
|
|
fallthrough
|
|
case machine.ApplyConfigurationRequest_REBOOT:
|
|
fallthrough
|
|
case machine.ApplyConfigurationRequest_AUTO:
|
|
default:
|
|
return nil, status.Errorf(codes.Unimplemented, "apply configuration --mode='%s' is not supported in maintenance mode",
|
|
strings.ReplaceAll(strings.ToLower(in.Mode.String()), "_", "-"))
|
|
}
|
|
|
|
cfgProvider, err := configloader.NewFromBytes(in.GetData())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse config: %w", err)
|
|
}
|
|
|
|
warnings, err := cfgProvider.Validate(s.controller.Runtime().State().Platform().Mode())
|
|
if err != nil {
|
|
return nil, status.Errorf(codes.InvalidArgument, "configuration validation failed: %s", err)
|
|
}
|
|
|
|
reply := &machine.ApplyConfigurationResponse{
|
|
Messages: []*machine.ApplyConfiguration{
|
|
{
|
|
Warnings: warnings,
|
|
},
|
|
},
|
|
}
|
|
|
|
if in.DryRun {
|
|
reply.Messages[0].ModeDetails = `Dry run summary:
|
|
Node is running in maintenance mode and does not have a config yet.`
|
|
|
|
return reply, nil
|
|
}
|
|
|
|
s.cfgCh <- in.GetData()
|
|
|
|
return reply, nil
|
|
}
|
|
|
|
// GenerateConfiguration implements the machine.MachineServer interface.
|
|
func (s *Server) GenerateConfiguration(ctx context.Context, in *machine.GenerateConfigurationRequest) (*machine.GenerateConfigurationResponse, error) {
|
|
if in.MachineConfig == nil {
|
|
return nil, fmt.Errorf("invalid generate request")
|
|
}
|
|
|
|
machineType := v1alpha1machine.Type(in.MachineConfig.Type)
|
|
|
|
if machineType == v1alpha1machine.TypeWorker {
|
|
return nil, fmt.Errorf("join config can't be generated in the maintenance mode")
|
|
}
|
|
|
|
return configuration.Generate(ctx, in)
|
|
}
|
|
|
|
// GenerateClientConfiguration implements the machine.MachineServer interface.
|
|
func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.GenerateClientConfigurationRequest) (*machine.GenerateClientConfigurationResponse, error) {
|
|
return nil, status.Error(codes.Unimplemented, "client configuration (talosconfig) can't be generated in the maintenance mode")
|
|
}
|
|
|
|
// Version implements the machine.MachineServer interface.
|
|
func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.VersionResponse, error) {
|
|
if err := assertPeerSideroLink(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var platform *machine.PlatformInfo
|
|
|
|
if s.controller.Runtime().State().Platform() != nil {
|
|
platform = &machine.PlatformInfo{
|
|
Name: s.controller.Runtime().State().Platform().Name(),
|
|
Mode: s.controller.Runtime().State().Platform().Mode().String(),
|
|
}
|
|
}
|
|
|
|
return &machine.VersionResponse{
|
|
Messages: []*machine.Version{
|
|
{
|
|
Version: version.NewVersion(),
|
|
Platform: platform,
|
|
},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// Upgrade initiates an upgrade.
|
|
//
|
|
//nolint:gocyclo,cyclop
|
|
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
|
|
if err = assertPeerSideroLink(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if s.controller.Runtime().State().Machine().Disk() == nil {
|
|
return nil, status.Errorf(codes.FailedPrecondition, "Talos is not installed")
|
|
}
|
|
|
|
actorID := uuid.New().String()
|
|
|
|
mode := s.controller.Runtime().State().Platform().Mode()
|
|
|
|
if !mode.Supports(runtime.Upgrade) {
|
|
return nil, status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
|
|
}
|
|
|
|
// none of the options are supported in maintenance mode
|
|
if in.GetPreserve() || in.GetStage() || in.GetForce() {
|
|
return nil, status.Errorf(codes.Unimplemented, "upgrade --preserve, --stage, and --force are not supported in maintenance mode")
|
|
}
|
|
|
|
log.Printf("upgrade request received: %q", in.GetImage())
|
|
|
|
runCtx := context.WithValue(context.Background(), runtime.ActorIDCtxKey{}, actorID)
|
|
|
|
go func() {
|
|
if err := s.controller.Run(runCtx, runtime.SequenceMaintenanceUpgrade, in); err != nil {
|
|
if !runtime.IsRebootError(err) {
|
|
log.Println("upgrade failed:", err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
reply = &machine.UpgradeResponse{
|
|
Messages: []*machine.Upgrade{
|
|
{
|
|
Ack: "Upgrade request received",
|
|
ActorId: actorID,
|
|
},
|
|
},
|
|
}
|
|
|
|
return reply, nil
|
|
}
|
|
|
|
// Reset resets the node.
|
|
//
|
|
//nolint:gocyclo
|
|
func (s *Server) Reset(ctx context.Context, in *machine.ResetRequest) (reply *machine.ResetResponse, err error) {
|
|
if err = assertPeerSideroLink(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if in.UserDisksToWipe != nil && in.Mode == machine.ResetRequest_SYSTEM_DISK {
|
|
return nil, fmt.Errorf("reset failed: invalid input, wipe mode SYSTEM_DISK doesn't support UserDisksToWipe parameter")
|
|
}
|
|
|
|
actorID := uuid.New().String()
|
|
|
|
log.Printf("reset request received. actorID: %s", actorID)
|
|
|
|
if len(in.GetSystemPartitionsToWipe()) > 0 {
|
|
return nil, fmt.Errorf("system partitions to wipe params is not supported in the maintenance mode")
|
|
}
|
|
|
|
var dev *blockdevice.BlockDevice
|
|
|
|
disk := s.controller.Runtime().State().Machine().Disk(disk.WithPartitionLabel(constants.BootPartitionLabel))
|
|
|
|
if disk == nil {
|
|
return nil, fmt.Errorf("reset failed: Talos is not installed")
|
|
}
|
|
|
|
dev, err = blockdevice.Open(disk.Device().Name())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer dev.Close() //nolint:errcheck
|
|
|
|
if err = dev.Reset(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resetCtx := context.WithValue(context.Background(), runtime.ActorIDCtxKey{}, actorID)
|
|
|
|
if in.Mode != machine.ResetRequest_SYSTEM_DISK {
|
|
for _, deviceName := range in.UserDisksToWipe {
|
|
var dev *blockdevice.BlockDevice
|
|
|
|
dev, err = blockdevice.Open(deviceName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer dev.Close() //nolint:errcheck
|
|
|
|
log.Printf("wiping user disk %s", deviceName)
|
|
|
|
err = dev.FastWipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
go func() {
|
|
sequence := runtime.SequenceShutdown
|
|
if in.Reboot {
|
|
sequence = runtime.SequenceReboot
|
|
}
|
|
|
|
if err := s.controller.Run(resetCtx, sequence, in); err != nil {
|
|
if !runtime.IsRebootError(err) {
|
|
log.Println("reset failed:", err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
reply = &machine.ResetResponse{
|
|
Messages: []*machine.Reset{
|
|
{
|
|
ActorId: actorID,
|
|
},
|
|
},
|
|
}
|
|
|
|
return reply, nil
|
|
}
|