diff --git a/cmd/talosctl/cmd/talos/upgrade.go b/cmd/talosctl/cmd/talos/upgrade.go index a8e29d096..f804c7bd9 100644 --- a/cmd/talosctl/cmd/talos/upgrade.go +++ b/cmd/talosctl/cmd/talos/upgrade.go @@ -28,6 +28,7 @@ var upgradeCmdFlags struct { force bool wait bool debug bool + insecure bool } // upgradeCmd represents the processes command. @@ -41,6 +42,10 @@ var upgradeCmd = &cobra.Command{ upgradeCmdFlags.wait = true } + if upgradeCmdFlags.wait && upgradeCmdFlags.insecure { + return fmt.Errorf("cannot use --wait and --insecure together") + } + if !upgradeCmdFlags.wait { return runUpgradeNoWait() } @@ -58,7 +63,7 @@ var upgradeCmd = &cobra.Command{ } func runUpgradeNoWait() error { - return WithClient(func(ctx context.Context, c *client.Client) error { + upgradeFn := func(ctx context.Context, c *client.Client) error { if err := helpers.ClientVersionCheck(ctx, c); err != nil { return err } @@ -98,7 +103,13 @@ func runUpgradeNoWait() error { } return w.Flush() - }) + } + + if upgradeCmdFlags.insecure { + return WithClientMaintenance(nil, upgradeFn) + } + + return WithClient(upgradeFn) } func upgradeGetActorID(ctx context.Context, c *client.Client) (string, error) { @@ -127,5 +138,6 @@ func init() { upgradeCmd.Flags().BoolVarP(&upgradeCmdFlags.force, "force", "f", false, "force the upgrade (skip checks on etcd health and members, might lead to data loss)") upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.wait, "wait", false, "wait for the operation to complete, tracking its progress. always set to true when --debug is set") upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.debug, "debug", false, "debug operation from kernel logs. --no-wait is set to false when this flag is set") + upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.insecure, "insecure", false, "upgrade using the insecure (encrypted with no auth) maintenance service") addCommand(upgradeCmd) } diff --git a/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go b/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go index 38210e3bd..4106d3bf1 100644 --- a/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go +++ b/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go @@ -22,11 +22,6 @@ import ( "syscall" "time" - "github.com/containerd/containerd" - "github.com/containerd/containerd/cio" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/oci" criconstants "github.com/containerd/containerd/pkg/cri/constants" cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1" "github.com/cosi-project/runtime/pkg/safe" @@ -55,7 +50,6 @@ import ( "google.golang.org/protobuf/types/known/emptypb" installer "github.com/talos-systems/talos/cmd/installer/pkg/install" - "github.com/talos-systems/talos/internal/app/machined/internal/install" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/disk" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader" @@ -68,8 +62,8 @@ import ( "github.com/talos-systems/talos/internal/pkg/containers" taloscontainerd "github.com/talos-systems/talos/internal/pkg/containers/containerd" "github.com/talos-systems/talos/internal/pkg/containers/cri" - "github.com/talos-systems/talos/internal/pkg/containers/image" "github.com/talos-systems/talos/internal/pkg/etcd" + "github.com/talos-systems/talos/internal/pkg/install" "github.com/talos-systems/talos/internal/pkg/miniprocfs" "github.com/talos-systems/talos/internal/pkg/mount" "github.com/talos-systems/talos/pkg/archiver" @@ -84,7 +78,6 @@ import ( "github.com/talos-systems/talos/pkg/machinery/api/storage" timeapi "github.com/talos-systems/talos/pkg/machinery/api/time" clientconfig "github.com/talos-systems/talos/pkg/machinery/client/config" - "github.com/talos-systems/talos/pkg/machinery/config" "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1" "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/generate" machinetype "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine" @@ -125,7 +118,7 @@ func (s *Server) checkSupported(feature runtime.ModeCapability) error { mode := s.Controller.Runtime().State().Platform().Mode() if !mode.Supports(feature) { - return fmt.Errorf("method is not supported in %s mode", mode.String()) + return status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String()) } return nil @@ -500,7 +493,7 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply log.Printf("validating %q", in.GetImage()) - if err = pullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil { + if err = install.PullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil { return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err) } @@ -1403,94 +1396,6 @@ func sendEmptyEvent(req *machine.EventsRequest, l machine.MachineService_EventsS return l.Send(emptyEvent) } -//nolint:gocyclo -func pullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error { - // Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry - containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace) - - const containerID = "validate" - - client, err := containerd.New(constants.SystemContainerdAddress) - if err != nil { - return err - } - - defer client.Close() //nolint:errcheck - - img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled()) - if err != nil { - return err - } - - // See if there's previous container/snapshot to clean up - var oldcontainer containerd.Container - - if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil { - if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil { - return fmt.Errorf("error deleting old container instance: %w", err) - } - } - - if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) { - return fmt.Errorf("error cleaning up stale snapshot: %w", err) - } - - // Launch the container with a known help command for a simple check to make sure the image is valid - args := []string{ - "/bin/installer", - "--help", - } - - specOpts := []oci.SpecOpts{ - oci.WithImageConfig(img), - oci.WithProcessArgs(args...), - } - - containerOpts := []containerd.NewContainerOpts{ - containerd.WithImage(img), - containerd.WithNewSnapshot(containerID, img), - containerd.WithNewSpec(specOpts...), - } - - container, err := client.NewContainer(containerdctx, containerID, containerOpts...) - if err != nil { - return err - } - - //nolint:errcheck - defer container.Delete(containerdctx, containerd.WithSnapshotCleanup) - - task, err := container.NewTask(containerdctx, cio.NullIO) - if err != nil { - return err - } - - //nolint:errcheck - defer task.Delete(containerdctx) - - exitStatusC, err := task.Wait(containerdctx) - if err != nil { - return err - } - - if err = task.Start(containerdctx); err != nil { - return err - } - - status := <-exitStatusC - - code, _, err := status.Result() - if err != nil { - return err - } - - if code != 0 { - return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer") - } - - return nil -} - // Containers implements the machine.MachineServer interface. func (s *Server) Containers(ctx context.Context, in *machine.ContainersRequest) (reply *machine.ContainersResponse, err error) { inspector, err := getContainerInspector(ctx, in.Namespace, in.Driver) diff --git a/internal/app/machined/main.go b/internal/app/machined/main.go index 87f20aa7f..4811eac7b 100644 --- a/internal/app/machined/main.go +++ b/internal/app/machined/main.go @@ -28,6 +28,7 @@ import ( "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader" "github.com/talos-systems/talos/internal/app/machined/pkg/system" "github.com/talos-systems/talos/internal/app/machined/pkg/system/services" + "github.com/talos-systems/talos/internal/app/maintenance" "github.com/talos-systems/talos/internal/app/poweroff" "github.com/talos-systems/talos/internal/app/trustd" "github.com/talos-systems/talos/internal/pkg/mount" @@ -197,11 +198,10 @@ func run() error { drainer := runtime.NewDrainer() defer func() { - c, cancel := context.WithTimeout(context.Background(), time.Second*10) + drainCtx, drainCtxCancel := context.WithTimeout(context.Background(), time.Second*10) + defer drainCtxCancel() - defer cancel() - - if e := drainer.Drain(c); e != nil { + if e := drainer.Drain(drainCtx); e != nil { log.Printf("WARNING: failed to drain controllers: %s", e) } }() @@ -227,25 +227,37 @@ func run() error { log.Printf("controller runtime finished") }() + // Inject controller into maintenance service. + maintenance.InjectController(c) + + initializeCanceled := false + // Initialize the machine. if err = c.Run(ctx, runtime.SequenceInitialize, nil); err != nil { - return err + if errors.Is(err, context.Canceled) { + initializeCanceled = true + } else { + return err + } } - // Perform an installation if required. - if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil { - return err - } + // If Initialize sequence was canceled, don't run any other sequence. + if !initializeCanceled { + // Perform an installation if required. + if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil { + return err + } - // Start the machine API. - system.Services(c.Runtime()).LoadAndStart( - &services.Machined{Controller: c}, - &services.APID{}, - ) + // Start the machine API. + system.Services(c.Runtime()).LoadAndStart( + &services.Machined{Controller: c}, + &services.APID{}, + ) - // Boot the machine. - if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) { - return err + // Boot the machine. + if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) { + return err + } } // Watch and handle runtime events. diff --git a/internal/app/machined/pkg/controllers/runtime/machine_status.go b/internal/app/machined/pkg/controllers/runtime/machine_status.go index b1053d543..2e61e228b 100644 --- a/internal/app/machined/pkg/controllers/runtime/machine_status.go +++ b/internal/app/machined/pkg/controllers/runtime/machine_status.go @@ -353,7 +353,7 @@ func (ctrl *MachineStatusController) watchEvents() { // install sequence is run always, even if the machine is already installed, so we'll catch it by phase name case v1alpha1runtime.SequenceShutdown.String(): newStage = runtime.MachineStageShuttingDown - case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String(): + case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String(), v1alpha1runtime.SequenceMaintenanceUpgrade.String(): newStage = runtime.MachineStageUpgrading case v1alpha1runtime.SequenceReset.String(): newStage = runtime.MachineStageResetting @@ -375,7 +375,9 @@ func (ctrl *MachineStatusController) watchEvents() { case currentSequence == v1alpha1runtime.SequenceInstall.String() && event.Phase == "install": newStage = runtime.MachineStageInstalling case (currentSequence == v1alpha1runtime.SequenceInstall.String() || - currentSequence == v1alpha1runtime.SequenceUpgrade.String()) && event.Phase == "kexec": + currentSequence == v1alpha1runtime.SequenceUpgrade.String() || + currentSequence == v1alpha1runtime.SequenceStageUpgrade.String() || + currentSequence == v1alpha1runtime.SequenceMaintenanceUpgrade.String()) && event.Phase == "kexec": newStage = runtime.MachineStageRebooting } } diff --git a/internal/app/machined/pkg/runtime/sequencer.go b/internal/app/machined/pkg/runtime/sequencer.go index 5d5407ab3..b7c3ccdbd 100644 --- a/internal/app/machined/pkg/runtime/sequencer.go +++ b/internal/app/machined/pkg/runtime/sequencer.go @@ -28,6 +28,8 @@ const ( SequenceUpgrade // SequenceStageUpgrade is the stage upgrade sequence. SequenceStageUpgrade + // SequenceMaintenanceUpgrade is the upgrade sequence in maintenance mode. + SequenceMaintenanceUpgrade // SequenceReset is the reset sequence. SequenceReset // SequenceReboot is the reboot sequence. @@ -35,18 +37,22 @@ const ( ) const ( - boot = "boot" - initialize = "initialize" - install = "install" - shutdown = "shutdown" - upgrade = "upgrade" - stageUpgrade = "stageUpgrade" - reset = "reset" - reboot = "reboot" - noop = "noop" + boot = "boot" + initialize = "initialize" + install = "install" + shutdown = "shutdown" + upgrade = "upgrade" + stageUpgrade = "stageUpgrade" + maintenanceUpgrade = "maintenanceUpgrade" + reset = "reset" + reboot = "reboot" + noop = "noop" ) var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{ + SequenceInitialize: { + SequenceMaintenanceUpgrade: {}, + }, SequenceBoot: { SequenceReboot: {}, SequenceReset: {}, @@ -62,7 +68,7 @@ var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{ // String returns the string representation of a `Sequence`. func (s Sequence) String() string { - return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, reset, reboot}[s] + return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, maintenanceUpgrade, reset, reboot}[s] } // CanTakeOver defines sequences priority. @@ -141,6 +147,7 @@ type Sequencer interface { Shutdown(Runtime, *machine.ShutdownRequest) []Phase StageUpgrade(Runtime, *machine.UpgradeRequest) []Phase Upgrade(Runtime, *machine.UpgradeRequest) []Phase + MaintenanceUpgrade(Runtime, *machine.UpgradeRequest) []Phase } // EventSequenceStart represents the sequence start event. diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller.go index f1da8c304..623d5314b 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller.go @@ -336,12 +336,8 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P case runtime.SequenceInstall: phases = c.s.Install(c.r) case runtime.SequenceShutdown: - var ( - in *machine.ShutdownRequest - ok bool - ) - - if in, ok = data.(*machine.ShutdownRequest); !ok { + in, ok := data.(*machine.ShutdownRequest) + if !ok { return nil, runtime.ErrInvalidSequenceData } @@ -349,34 +345,29 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P case runtime.SequenceReboot: phases = c.s.Reboot(c.r) case runtime.SequenceUpgrade: - var ( - in *machine.UpgradeRequest - ok bool - ) - - if in, ok = data.(*machine.UpgradeRequest); !ok { + in, ok := data.(*machine.UpgradeRequest) + if !ok { return nil, runtime.ErrInvalidSequenceData } phases = c.s.Upgrade(c.r, in) case runtime.SequenceStageUpgrade: - var ( - in *machine.UpgradeRequest - ok bool - ) - - if in, ok = data.(*machine.UpgradeRequest); !ok { + in, ok := data.(*machine.UpgradeRequest) + if !ok { return nil, runtime.ErrInvalidSequenceData } phases = c.s.StageUpgrade(c.r, in) - case runtime.SequenceReset: - var ( - in runtime.ResetOptions - ok bool - ) + case runtime.SequenceMaintenanceUpgrade: + in, ok := data.(*machine.UpgradeRequest) + if !ok { + return nil, runtime.ErrInvalidSequenceData + } - if in, ok = data.(runtime.ResetOptions); !ok { + phases = c.s.MaintenanceUpgrade(c.r, in) + case runtime.SequenceReset: + in, ok := data.(runtime.ResetOptions) + if !ok { return nil, runtime.ErrInvalidSequenceData } diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller_test.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller_test.go index 6ce9e1cb6..bc7c89b2f 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller_test.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_controller_test.go @@ -59,6 +59,10 @@ func (m *mockSequencer) StageUpgrade(r runtime.Runtime, req *machine.UpgradeRequ return m.phases[runtime.SequenceStageUpgrade] } +func (m *mockSequencer) MaintenanceUpgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase { + return m.phases[runtime.SequenceMaintenanceUpgrade] +} + func (m *mockSequencer) Upgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase { return m.phases[runtime.SequenceUpgrade] } diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go index a143efeca..bbd29c93a 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer.go @@ -375,6 +375,44 @@ func (*Sequencer) StageUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) return phases } +// MaintenanceUpgrade is the upgrade sequence in maintenance mode. +func (*Sequencer) MaintenanceUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase { + phases := PhaseList{} + + switch r.State().Platform().Mode() { //nolint:exhaustive + case runtime.ModeContainer: + return nil + default: + phases = phases.Append( + "containerd", + StartContainerd, + ).Append( + "verifyDisk", + VerifyDiskAvailability, + ).Append( + "upgrade", + Upgrade, + ).Append( + "mountBoot", + MountBootPartition, + ).Append( + "kexec", + KexecPrepare, + ).Append( + "unmountBoot", + UnmountBootPartition, + ).Append( + "stopEverything", + StopAllServices, + ).Append( + "reboot", + Reboot, + ) + } + + return phases +} + // Upgrade is the upgrade sequence. func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase { phases := PhaseList{} diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go index 1a3cad3b5..b0b43c53e 100644 --- a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go +++ b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go @@ -43,7 +43,6 @@ import ( "kernel.org/pub/linux/libs/security/libcap/cap" installer "github.com/talos-systems/talos/cmd/installer/pkg/install" - "github.com/talos-systems/talos/internal/app/machined/internal/install" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader/adv" @@ -56,6 +55,7 @@ import ( "github.com/talos-systems/talos/internal/app/maintenance" "github.com/talos-systems/talos/internal/pkg/cri" "github.com/talos-systems/talos/internal/pkg/etcd" + "github.com/talos-systems/talos/internal/pkg/install" "github.com/talos-systems/talos/internal/pkg/mount" "github.com/talos-systems/talos/internal/pkg/partition" "github.com/talos-systems/talos/pkg/conditions" @@ -625,7 +625,7 @@ func receiveConfigViaMaintenanceService(ctx context.Context, logger *log.Logger, Task: "runningMaintenance", }) - cfgBytes, err := maintenance.Run(ctx, logger, r) + cfgBytes, err := maintenance.Run(ctx, logger) if err != nil { return nil, fmt.Errorf("maintenance service failed: %w", err) } diff --git a/internal/app/machined/pkg/system/services/containerd.go b/internal/app/machined/pkg/system/services/containerd.go index 667e3ada8..b2c01a124 100644 --- a/internal/app/machined/pkg/system/services/containerd.go +++ b/internal/app/machined/pkg/system/services/containerd.go @@ -94,12 +94,21 @@ func (c *Containerd) Runner(r runtime.Runtime) (runner.Runner, error) { } env := []string{} - for key, val := range r.Config().Machine().Env() { - env = append(env, fmt.Sprintf("%s=%s", key, val)) + + if r.Config() != nil { + for key, val := range r.Config().Machine().Env() { + env = append(env, fmt.Sprintf("%s=%s", key, val)) + } + } + + debug := false + + if r.Config() != nil { + debug = r.Config().Debug() } return restart.New(process.NewRunner( - r.Config().Debug(), + debug, args, runner.WithLoggingManager(r.Logging()), runner.WithEnv(env), diff --git a/internal/app/maintenance/main.go b/internal/app/maintenance/main.go index 443f2e57f..27785a007 100644 --- a/internal/app/maintenance/main.go +++ b/internal/app/maintenance/main.go @@ -32,19 +32,30 @@ import ( "github.com/talos-systems/talos/pkg/machinery/resources/network" ) +var ctrl runtime.Controller + +// InjectController is used to pass the controller into the maintenance service. +func InjectController(c runtime.Controller) { + ctrl = c +} + // Run executes the configuration receiver, returning any configuration it receives. // //nolint:gocyclo -func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, error) { +func Run(ctx context.Context, logger *log.Logger) ([]byte, error) { + if ctrl == nil { + return nil, fmt.Errorf("controller is not injected") + } + logger.Println("waiting for network address to be ready") - if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil { + if err := network.NewReadyCondition(ctrl.Runtime().State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil { return nil, fmt.Errorf("error waiting for the network to be ready: %w", err) } var sideroLinkAddress netip.Addr - currentAddresses, err := r.State().V1Alpha2().Resources().WatchFor(ctx, + currentAddresses, err := ctrl.Runtime().State().V1Alpha2().Resources().WatchFor(ctx, resource.NewMetadata(network.NamespaceName, network.NodeAddressType, network.NodeAddressCurrentID, resource.VersionUndefined), sideroLinkAddressFinder(&sideroLinkAddress, logger), ) @@ -55,7 +66,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs() // hostname might not be available yet, so use it only if it is available - hostnameStatus, err := r.State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined)) + hostnameStatus, err := ctrl.Runtime().State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined)) if err != nil && !state.IsNotFoundError(err) { return nil, fmt.Errorf("error getting node hostname: %w", err) } @@ -83,7 +94,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er cfgCh := make(chan []byte) - s := server.New(r, logger, cfgCh) + s := server.New(ctrl, logger, cfgCh) injector := &authz.Injector{ Mode: authz.ReadOnly, @@ -152,7 +163,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er return cfg, err case <-ctx.Done(): - return nil, fmt.Errorf("context is done") + return nil, ctx.Err() } } diff --git a/internal/app/maintenance/server/peer.go b/internal/app/maintenance/server/peer.go new file mode 100644 index 000000000..f294d97fe --- /dev/null +++ b/internal/app/maintenance/server/peer.go @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package server + +import ( + "context" + "net" + "net/netip" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/peer" + "google.golang.org/grpc/status" + + "github.com/talos-systems/talos/pkg/machinery/resources/network" +) + +func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool { + remotePeer, ok := peer.FromContext(ctx) + if !ok { + return false + } + + if remotePeer.Addr.Network() != "tcp" { + return false + } + + ip, _, err := net.SplitHostPort(remotePeer.Addr.String()) + if err != nil { + return false + } + + addr, err := netip.ParseAddr(ip) + if err != nil { + return false + } + + return condition(addr) +} + +func assertPeerSideroLink(ctx context.Context) error { + if !verifyPeer(ctx, func(addr netip.Addr) bool { + return network.IsULA(addr, network.ULASideroLink) + }) { + return status.Error(codes.Unimplemented, "API is not implemented in maintenance mode") + } + + return nil +} diff --git a/internal/app/maintenance/server/server.go b/internal/app/maintenance/server/server.go index 450c2b02b..32868cbdc 100644 --- a/internal/app/maintenance/server/server.go +++ b/internal/app/maintenance/server/server.go @@ -8,16 +8,14 @@ import ( "context" "fmt" "log" - "net" - "net/netip" "strings" cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1" "github.com/cosi-project/runtime/pkg/state" "github.com/cosi-project/runtime/pkg/state/protobuf/server" + "github.com/google/uuid" "google.golang.org/grpc" "google.golang.org/grpc/codes" - "google.golang.org/grpc/peer" "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/emptypb" @@ -30,7 +28,6 @@ import ( "github.com/talos-systems/talos/pkg/machinery/api/storage" "github.com/talos-systems/talos/pkg/machinery/config/configloader" v1alpha1machine "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine" - "github.com/talos-systems/talos/pkg/machinery/resources/network" "github.com/talos-systems/talos/pkg/version" ) @@ -38,18 +35,18 @@ import ( type Server struct { machine.UnimplementedMachineServiceServer - runtime runtime.Runtime - logger *log.Logger - cfgCh chan []byte - server *grpc.Server + controller runtime.Controller + logger *log.Logger + cfgCh chan<- []byte + server *grpc.Server } // New initializes and returns a `Server`. -func New(r runtime.Runtime, logger *log.Logger, cfgCh chan []byte) *Server { +func New(c runtime.Controller, logger *log.Logger, cfgCh chan<- []byte) *Server { return &Server{ - runtime: r, - logger: logger, - cfgCh: cfgCh, + controller: c, + logger: logger, + cfgCh: cfgCh, } } @@ -58,7 +55,7 @@ func (s *Server) Register(obj *grpc.Server) { s.server = obj // wrap resources with access filter - resourceState := s.runtime.State().V1Alpha2().Resources() + resourceState := s.controller.Runtime().State().V1Alpha2().Resources() resourceState = state.WrapCore(state.Filter(resourceState, resources.AccessPolicy(resourceState))) storage.RegisterStorageServiceServer(obj, &storaged.Server{}) @@ -86,7 +83,7 @@ func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfig return nil, fmt.Errorf("failed to parse config: %w", err) } - warnings, err := cfgProvider.Validate(s.runtime.State().Platform().Mode()) + warnings, err := cfgProvider.Validate(s.controller.Runtime().State().Platform().Mode()) if err != nil { return nil, status.Errorf(codes.InvalidArgument, "configuration validation failed: %s", err) } @@ -131,43 +128,18 @@ func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.Ge return nil, status.Error(codes.Unimplemented, "client configuration (talosconfig) can't be generated in the maintenance mode") } -func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool { - remotePeer, ok := peer.FromContext(ctx) - if !ok { - return false - } - - if remotePeer.Addr.Network() != "tcp" { - return false - } - - ip, _, err := net.SplitHostPort(remotePeer.Addr.String()) - if err != nil { - return false - } - - addr, err := netip.ParseAddr(ip) - if err != nil { - return false - } - - return condition(addr) -} - // Version implements the machine.MachineServer interface. func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.VersionResponse, error) { - if !verifyPeer(ctx, func(addr netip.Addr) bool { - return network.IsULA(addr, network.ULASideroLink) - }) { - return nil, status.Error(codes.Unimplemented, "Version API is not implemented in maintenance mode") + if err := assertPeerSideroLink(ctx); err != nil { + return nil, err } var platform *machine.PlatformInfo - if s.runtime.State().Platform() != nil { + if s.controller.Runtime().State().Platform() != nil { platform = &machine.PlatformInfo{ - Name: s.runtime.State().Platform().Name(), - Mode: s.runtime.State().Platform().Mode().String(), + Name: s.controller.Runtime().State().Platform().Name(), + Mode: s.controller.Runtime().State().Platform().Mode().String(), } } @@ -180,3 +152,52 @@ func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.Versi }, }, nil } + +// Upgrade initiates an upgrade. +// +//nolint:gocyclo,cyclop +func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) { + if err = assertPeerSideroLink(ctx); err != nil { + return nil, err + } + + if s.controller.Runtime().State().Machine().Disk() == nil { + return nil, status.Errorf(codes.FailedPrecondition, "Talos is not installed") + } + + actorID := uuid.New().String() + + mode := s.controller.Runtime().State().Platform().Mode() + + if !mode.Supports(runtime.Upgrade) { + return nil, status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String()) + } + + // none of the options are supported in maintenance mode + if in.GetPreserve() || in.GetStage() || in.GetForce() { + return nil, status.Errorf(codes.Unimplemented, "upgrade --preserve, --stage, and --force are not supported in maintenance mode") + } + + log.Printf("upgrade request received: %q", in.GetImage()) + + runCtx := context.WithValue(context.Background(), runtime.ActorIDCtxKey{}, actorID) + + go func() { + if err := s.controller.Run(runCtx, runtime.SequenceMaintenanceUpgrade, in); err != nil { + if !runtime.IsRebootError(err) { + log.Println("upgrade failed:", err) + } + } + }() + + reply = &machine.UpgradeResponse{ + Messages: []*machine.Upgrade{ + { + Ack: "Upgrade request received", + ActorId: actorID, + }, + }, + } + + return reply, nil +} diff --git a/internal/app/machined/internal/install/install.go b/internal/pkg/install/install.go similarity index 85% rename from internal/app/machined/internal/install/install.go rename to internal/pkg/install/install.go index a5a10b961..5cdcc4bdf 100644 --- a/internal/app/machined/internal/install/install.go +++ b/internal/pkg/install/install.go @@ -8,6 +8,7 @@ import ( "bytes" "context" "fmt" + "io" "log" "os" "strconv" @@ -29,6 +30,7 @@ import ( "github.com/talos-systems/talos/internal/pkg/extensions" machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine" "github.com/talos-systems/talos/pkg/machinery/config" + "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1" "github.com/talos-systems/talos/pkg/machinery/constants" ) @@ -46,9 +48,16 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts } } - configBytes, err := cfg.Bytes() - if err != nil { - return err + var ( + registriesConfig config.Registries + extensionsConfig []config.Extension + ) + + if cfg != nil { + registriesConfig = cfg.Machine().Registries() + extensionsConfig = cfg.Machine().Install().Extensions() + } else { + registriesConfig = &v1alpha1.RegistriesConfig{} } ctx, cancel := context.WithCancel(context.Background()) @@ -77,7 +86,7 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts if img == nil || err != nil && errdefs.IsNotFound(err) { log.Printf("pulling %q", ref) - img, err = image.Pull(ctx, cfg.Machine().Registries(), client, ref) + img, err = image.Pull(ctx, registriesConfig, client, ref) } if err != nil { @@ -89,8 +98,10 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts return err } - if err = puller.PullAndMount(ctx, cfg.Machine().Registries(), cfg.Machine().Install().Extensions()); err != nil { - return err + if extensionsConfig != nil { + if err = puller.PullAndMount(ctx, registriesConfig, extensionsConfig); err != nil { + return err + } } defer func() { @@ -205,19 +216,35 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts w := &kmsg.Writer{KmsgWriter: f} - configR := &containerdrunner.StdinCloser{ - Stdin: bytes.NewReader(configBytes), - Closer: make(chan struct{}), + var r interface { + io.Reader + WaitAndClose(context.Context, containerd.Task) } - creator := cio.NewCreator(cio.WithStreams(configR, w, w)) + if cfg != nil { + var configBytes []byte + + configBytes, err = cfg.Bytes() + if err != nil { + return err + } + + r = &containerdrunner.StdinCloser{ + Stdin: bytes.NewReader(configBytes), + Closer: make(chan struct{}), + } + } + + creator := cio.NewCreator(cio.WithStreams(r, w, w)) t, err := container.NewTask(ctx, creator) if err != nil { return err } - go configR.WaitAndClose(ctx, t) + if r != nil { + go r.WaitAndClose(ctx, t) + } defer t.Delete(ctx) //nolint:errcheck @@ -242,10 +269,15 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts // OptionsFromUpgradeRequest builds installer options from upgrade request. func OptionsFromUpgradeRequest(r runtime.Runtime, in *machineapi.UpgradeRequest) []Option { - return []Option{ + opts := []Option{ WithPull(false), WithUpgrade(true), WithForce(!in.GetPreserve()), - WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()), } + + if r.Config() != nil { + opts = append(opts, WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs())) + } + + return opts } diff --git a/internal/app/machined/internal/install/options.go b/internal/pkg/install/options.go similarity index 100% rename from internal/app/machined/internal/install/options.go rename to internal/pkg/install/options.go diff --git a/internal/pkg/install/pull.go b/internal/pkg/install/pull.go new file mode 100644 index 000000000..802eb973f --- /dev/null +++ b/internal/pkg/install/pull.go @@ -0,0 +1,110 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package install + +import ( + "context" + "fmt" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/oci" + + "github.com/talos-systems/talos/internal/pkg/containers/image" + "github.com/talos-systems/talos/pkg/machinery/config" + "github.com/talos-systems/talos/pkg/machinery/constants" +) + +// PullAndValidateInstallerImage pulls down the installer and validates that it can run. +// +//nolint:gocyclo +func PullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error { + // Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry + containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace) + + const containerID = "validate" + + client, err := containerd.New(constants.SystemContainerdAddress) + if err != nil { + return err + } + + defer client.Close() //nolint:errcheck + + img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled()) + if err != nil { + return err + } + + // See if there's previous container/snapshot to clean up + var oldcontainer containerd.Container + + if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil { + if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil { + return fmt.Errorf("error deleting old container instance: %w", err) + } + } + + if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) { + return fmt.Errorf("error cleaning up stale snapshot: %w", err) + } + + // Launch the container with a known help command for a simple check to make sure the image is valid + args := []string{ + "/bin/installer", + "--help", + } + + specOpts := []oci.SpecOpts{ + oci.WithImageConfig(img), + oci.WithProcessArgs(args...), + } + + containerOpts := []containerd.NewContainerOpts{ + containerd.WithImage(img), + containerd.WithNewSnapshot(containerID, img), + containerd.WithNewSpec(specOpts...), + } + + container, err := client.NewContainer(containerdctx, containerID, containerOpts...) + if err != nil { + return err + } + + //nolint:errcheck + defer container.Delete(containerdctx, containerd.WithSnapshotCleanup) + + task, err := container.NewTask(containerdctx, cio.NullIO) + if err != nil { + return err + } + + //nolint:errcheck + defer task.Delete(containerdctx) + + exitStatusC, err := task.Wait(containerdctx) + if err != nil { + return err + } + + if err = task.Start(containerdctx); err != nil { + return err + } + + status := <-exitStatusC + + code, _, err := status.Result() + if err != nil { + return err + } + + if code != 0 { + return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer") + } + + return nil +} diff --git a/website/content/v1.3/reference/cli.md b/website/content/v1.3/reference/cli.md index b44273ddd..341e96eaa 100644 --- a/website/content/v1.3/reference/cli.md +++ b/website/content/v1.3/reference/cli.md @@ -2201,6 +2201,7 @@ talosctl upgrade [flags] -f, --force force the upgrade (skip checks on etcd health and members, might lead to data loss) -h, --help help for upgrade -i, --image string the container image to use for performing the install + --insecure upgrade using the insecure (encrypted with no auth) maintenance service -p, --preserve preserve data -s, --stage stage the upgrade to perform it after a reboot --wait wait for the operation to complete, tracking its progress. always set to true when --debug is set