feat: allow upgrades in maintenance mode (only over SideroLink)

This implements a simple way to upgrade Talos node running in
maintenance mode (only if Talos is installed, i.e. if `STATE` and
`EPHEMERAL` partitions are wiped).

Upgrade is only available over SideroLink for security reasons.

Upgrade in maintenance mode doesn't support any options, and it works
without machine configuration, so proxy environment variables are not
available, registry mirrors can't be used, and extensions are not
installed.

Fixes #6224

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2022-09-20 22:27:03 +04:00
parent 48dee48057
commit 139c62d762
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
17 changed files with 426 additions and 221 deletions

View File

@ -28,6 +28,7 @@ var upgradeCmdFlags struct {
force bool force bool
wait bool wait bool
debug bool debug bool
insecure bool
} }
// upgradeCmd represents the processes command. // upgradeCmd represents the processes command.
@ -41,6 +42,10 @@ var upgradeCmd = &cobra.Command{
upgradeCmdFlags.wait = true upgradeCmdFlags.wait = true
} }
if upgradeCmdFlags.wait && upgradeCmdFlags.insecure {
return fmt.Errorf("cannot use --wait and --insecure together")
}
if !upgradeCmdFlags.wait { if !upgradeCmdFlags.wait {
return runUpgradeNoWait() return runUpgradeNoWait()
} }
@ -58,7 +63,7 @@ var upgradeCmd = &cobra.Command{
} }
func runUpgradeNoWait() error { func runUpgradeNoWait() error {
return WithClient(func(ctx context.Context, c *client.Client) error { upgradeFn := func(ctx context.Context, c *client.Client) error {
if err := helpers.ClientVersionCheck(ctx, c); err != nil { if err := helpers.ClientVersionCheck(ctx, c); err != nil {
return err return err
} }
@ -98,7 +103,13 @@ func runUpgradeNoWait() error {
} }
return w.Flush() return w.Flush()
}) }
if upgradeCmdFlags.insecure {
return WithClientMaintenance(nil, upgradeFn)
}
return WithClient(upgradeFn)
} }
func upgradeGetActorID(ctx context.Context, c *client.Client) (string, error) { func upgradeGetActorID(ctx context.Context, c *client.Client) (string, error) {
@ -127,5 +138,6 @@ func init() {
upgradeCmd.Flags().BoolVarP(&upgradeCmdFlags.force, "force", "f", false, "force the upgrade (skip checks on etcd health and members, might lead to data loss)") upgradeCmd.Flags().BoolVarP(&upgradeCmdFlags.force, "force", "f", false, "force the upgrade (skip checks on etcd health and members, might lead to data loss)")
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.wait, "wait", false, "wait for the operation to complete, tracking its progress. always set to true when --debug is set") upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.wait, "wait", false, "wait for the operation to complete, tracking its progress. always set to true when --debug is set")
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.debug, "debug", false, "debug operation from kernel logs. --no-wait is set to false when this flag is set") upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.debug, "debug", false, "debug operation from kernel logs. --no-wait is set to false when this flag is set")
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.insecure, "insecure", false, "upgrade using the insecure (encrypted with no auth) maintenance service")
addCommand(upgradeCmd) addCommand(upgradeCmd)
} }

View File

@ -22,11 +22,6 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
criconstants "github.com/containerd/containerd/pkg/cri/constants" criconstants "github.com/containerd/containerd/pkg/cri/constants"
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1" cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
"github.com/cosi-project/runtime/pkg/safe" "github.com/cosi-project/runtime/pkg/safe"
@ -55,7 +50,6 @@ import (
"google.golang.org/protobuf/types/known/emptypb" "google.golang.org/protobuf/types/known/emptypb"
installer "github.com/talos-systems/talos/cmd/installer/pkg/install" installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
"github.com/talos-systems/talos/internal/app/machined/internal/install"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/disk" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/disk"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
@ -68,8 +62,8 @@ import (
"github.com/talos-systems/talos/internal/pkg/containers" "github.com/talos-systems/talos/internal/pkg/containers"
taloscontainerd "github.com/talos-systems/talos/internal/pkg/containers/containerd" taloscontainerd "github.com/talos-systems/talos/internal/pkg/containers/containerd"
"github.com/talos-systems/talos/internal/pkg/containers/cri" "github.com/talos-systems/talos/internal/pkg/containers/cri"
"github.com/talos-systems/talos/internal/pkg/containers/image"
"github.com/talos-systems/talos/internal/pkg/etcd" "github.com/talos-systems/talos/internal/pkg/etcd"
"github.com/talos-systems/talos/internal/pkg/install"
"github.com/talos-systems/talos/internal/pkg/miniprocfs" "github.com/talos-systems/talos/internal/pkg/miniprocfs"
"github.com/talos-systems/talos/internal/pkg/mount" "github.com/talos-systems/talos/internal/pkg/mount"
"github.com/talos-systems/talos/pkg/archiver" "github.com/talos-systems/talos/pkg/archiver"
@ -84,7 +78,6 @@ import (
"github.com/talos-systems/talos/pkg/machinery/api/storage" "github.com/talos-systems/talos/pkg/machinery/api/storage"
timeapi "github.com/talos-systems/talos/pkg/machinery/api/time" timeapi "github.com/talos-systems/talos/pkg/machinery/api/time"
clientconfig "github.com/talos-systems/talos/pkg/machinery/client/config" clientconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
"github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1" "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/generate" "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/generate"
machinetype "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine" machinetype "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
@ -125,7 +118,7 @@ func (s *Server) checkSupported(feature runtime.ModeCapability) error {
mode := s.Controller.Runtime().State().Platform().Mode() mode := s.Controller.Runtime().State().Platform().Mode()
if !mode.Supports(feature) { if !mode.Supports(feature) {
return fmt.Errorf("method is not supported in %s mode", mode.String()) return status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
} }
return nil return nil
@ -500,7 +493,7 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
log.Printf("validating %q", in.GetImage()) log.Printf("validating %q", in.GetImage())
if err = pullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil { if err = install.PullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil {
return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err) return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err)
} }
@ -1403,94 +1396,6 @@ func sendEmptyEvent(req *machine.EventsRequest, l machine.MachineService_EventsS
return l.Send(emptyEvent) return l.Send(emptyEvent)
} }
//nolint:gocyclo
func pullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error {
// Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry
containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace)
const containerID = "validate"
client, err := containerd.New(constants.SystemContainerdAddress)
if err != nil {
return err
}
defer client.Close() //nolint:errcheck
img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled())
if err != nil {
return err
}
// See if there's previous container/snapshot to clean up
var oldcontainer containerd.Container
if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil {
if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil {
return fmt.Errorf("error deleting old container instance: %w", err)
}
}
if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("error cleaning up stale snapshot: %w", err)
}
// Launch the container with a known help command for a simple check to make sure the image is valid
args := []string{
"/bin/installer",
"--help",
}
specOpts := []oci.SpecOpts{
oci.WithImageConfig(img),
oci.WithProcessArgs(args...),
}
containerOpts := []containerd.NewContainerOpts{
containerd.WithImage(img),
containerd.WithNewSnapshot(containerID, img),
containerd.WithNewSpec(specOpts...),
}
container, err := client.NewContainer(containerdctx, containerID, containerOpts...)
if err != nil {
return err
}
//nolint:errcheck
defer container.Delete(containerdctx, containerd.WithSnapshotCleanup)
task, err := container.NewTask(containerdctx, cio.NullIO)
if err != nil {
return err
}
//nolint:errcheck
defer task.Delete(containerdctx)
exitStatusC, err := task.Wait(containerdctx)
if err != nil {
return err
}
if err = task.Start(containerdctx); err != nil {
return err
}
status := <-exitStatusC
code, _, err := status.Result()
if err != nil {
return err
}
if code != 0 {
return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer")
}
return nil
}
// Containers implements the machine.MachineServer interface. // Containers implements the machine.MachineServer interface.
func (s *Server) Containers(ctx context.Context, in *machine.ContainersRequest) (reply *machine.ContainersResponse, err error) { func (s *Server) Containers(ctx context.Context, in *machine.ContainersRequest) (reply *machine.ContainersResponse, err error) {
inspector, err := getContainerInspector(ctx, in.Namespace, in.Driver) inspector, err := getContainerInspector(ctx, in.Namespace, in.Driver)

View File

@ -28,6 +28,7 @@ import (
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
"github.com/talos-systems/talos/internal/app/machined/pkg/system" "github.com/talos-systems/talos/internal/app/machined/pkg/system"
"github.com/talos-systems/talos/internal/app/machined/pkg/system/services" "github.com/talos-systems/talos/internal/app/machined/pkg/system/services"
"github.com/talos-systems/talos/internal/app/maintenance"
"github.com/talos-systems/talos/internal/app/poweroff" "github.com/talos-systems/talos/internal/app/poweroff"
"github.com/talos-systems/talos/internal/app/trustd" "github.com/talos-systems/talos/internal/app/trustd"
"github.com/talos-systems/talos/internal/pkg/mount" "github.com/talos-systems/talos/internal/pkg/mount"
@ -197,11 +198,10 @@ func run() error {
drainer := runtime.NewDrainer() drainer := runtime.NewDrainer()
defer func() { defer func() {
c, cancel := context.WithTimeout(context.Background(), time.Second*10) drainCtx, drainCtxCancel := context.WithTimeout(context.Background(), time.Second*10)
defer drainCtxCancel()
defer cancel() if e := drainer.Drain(drainCtx); e != nil {
if e := drainer.Drain(c); e != nil {
log.Printf("WARNING: failed to drain controllers: %s", e) log.Printf("WARNING: failed to drain controllers: %s", e)
} }
}() }()
@ -227,11 +227,22 @@ func run() error {
log.Printf("controller runtime finished") log.Printf("controller runtime finished")
}() }()
// Inject controller into maintenance service.
maintenance.InjectController(c)
initializeCanceled := false
// Initialize the machine. // Initialize the machine.
if err = c.Run(ctx, runtime.SequenceInitialize, nil); err != nil { if err = c.Run(ctx, runtime.SequenceInitialize, nil); err != nil {
if errors.Is(err, context.Canceled) {
initializeCanceled = true
} else {
return err return err
} }
}
// If Initialize sequence was canceled, don't run any other sequence.
if !initializeCanceled {
// Perform an installation if required. // Perform an installation if required.
if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil { if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil {
return err return err
@ -247,6 +258,7 @@ func run() error {
if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) { if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) {
return err return err
} }
}
// Watch and handle runtime events. // Watch and handle runtime events.
//nolint:errcheck //nolint:errcheck

View File

@ -353,7 +353,7 @@ func (ctrl *MachineStatusController) watchEvents() {
// install sequence is run always, even if the machine is already installed, so we'll catch it by phase name // install sequence is run always, even if the machine is already installed, so we'll catch it by phase name
case v1alpha1runtime.SequenceShutdown.String(): case v1alpha1runtime.SequenceShutdown.String():
newStage = runtime.MachineStageShuttingDown newStage = runtime.MachineStageShuttingDown
case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String(): case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String(), v1alpha1runtime.SequenceMaintenanceUpgrade.String():
newStage = runtime.MachineStageUpgrading newStage = runtime.MachineStageUpgrading
case v1alpha1runtime.SequenceReset.String(): case v1alpha1runtime.SequenceReset.String():
newStage = runtime.MachineStageResetting newStage = runtime.MachineStageResetting
@ -375,7 +375,9 @@ func (ctrl *MachineStatusController) watchEvents() {
case currentSequence == v1alpha1runtime.SequenceInstall.String() && event.Phase == "install": case currentSequence == v1alpha1runtime.SequenceInstall.String() && event.Phase == "install":
newStage = runtime.MachineStageInstalling newStage = runtime.MachineStageInstalling
case (currentSequence == v1alpha1runtime.SequenceInstall.String() || case (currentSequence == v1alpha1runtime.SequenceInstall.String() ||
currentSequence == v1alpha1runtime.SequenceUpgrade.String()) && event.Phase == "kexec": currentSequence == v1alpha1runtime.SequenceUpgrade.String() ||
currentSequence == v1alpha1runtime.SequenceStageUpgrade.String() ||
currentSequence == v1alpha1runtime.SequenceMaintenanceUpgrade.String()) && event.Phase == "kexec":
newStage = runtime.MachineStageRebooting newStage = runtime.MachineStageRebooting
} }
} }

View File

@ -28,6 +28,8 @@ const (
SequenceUpgrade SequenceUpgrade
// SequenceStageUpgrade is the stage upgrade sequence. // SequenceStageUpgrade is the stage upgrade sequence.
SequenceStageUpgrade SequenceStageUpgrade
// SequenceMaintenanceUpgrade is the upgrade sequence in maintenance mode.
SequenceMaintenanceUpgrade
// SequenceReset is the reset sequence. // SequenceReset is the reset sequence.
SequenceReset SequenceReset
// SequenceReboot is the reboot sequence. // SequenceReboot is the reboot sequence.
@ -41,12 +43,16 @@ const (
shutdown = "shutdown" shutdown = "shutdown"
upgrade = "upgrade" upgrade = "upgrade"
stageUpgrade = "stageUpgrade" stageUpgrade = "stageUpgrade"
maintenanceUpgrade = "maintenanceUpgrade"
reset = "reset" reset = "reset"
reboot = "reboot" reboot = "reboot"
noop = "noop" noop = "noop"
) )
var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{ var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
SequenceInitialize: {
SequenceMaintenanceUpgrade: {},
},
SequenceBoot: { SequenceBoot: {
SequenceReboot: {}, SequenceReboot: {},
SequenceReset: {}, SequenceReset: {},
@ -62,7 +68,7 @@ var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
// String returns the string representation of a `Sequence`. // String returns the string representation of a `Sequence`.
func (s Sequence) String() string { func (s Sequence) String() string {
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, reset, reboot}[s] return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, maintenanceUpgrade, reset, reboot}[s]
} }
// CanTakeOver defines sequences priority. // CanTakeOver defines sequences priority.
@ -141,6 +147,7 @@ type Sequencer interface {
Shutdown(Runtime, *machine.ShutdownRequest) []Phase Shutdown(Runtime, *machine.ShutdownRequest) []Phase
StageUpgrade(Runtime, *machine.UpgradeRequest) []Phase StageUpgrade(Runtime, *machine.UpgradeRequest) []Phase
Upgrade(Runtime, *machine.UpgradeRequest) []Phase Upgrade(Runtime, *machine.UpgradeRequest) []Phase
MaintenanceUpgrade(Runtime, *machine.UpgradeRequest) []Phase
} }
// EventSequenceStart represents the sequence start event. // EventSequenceStart represents the sequence start event.

View File

@ -336,12 +336,8 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
case runtime.SequenceInstall: case runtime.SequenceInstall:
phases = c.s.Install(c.r) phases = c.s.Install(c.r)
case runtime.SequenceShutdown: case runtime.SequenceShutdown:
var ( in, ok := data.(*machine.ShutdownRequest)
in *machine.ShutdownRequest if !ok {
ok bool
)
if in, ok = data.(*machine.ShutdownRequest); !ok {
return nil, runtime.ErrInvalidSequenceData return nil, runtime.ErrInvalidSequenceData
} }
@ -349,34 +345,29 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
case runtime.SequenceReboot: case runtime.SequenceReboot:
phases = c.s.Reboot(c.r) phases = c.s.Reboot(c.r)
case runtime.SequenceUpgrade: case runtime.SequenceUpgrade:
var ( in, ok := data.(*machine.UpgradeRequest)
in *machine.UpgradeRequest if !ok {
ok bool
)
if in, ok = data.(*machine.UpgradeRequest); !ok {
return nil, runtime.ErrInvalidSequenceData return nil, runtime.ErrInvalidSequenceData
} }
phases = c.s.Upgrade(c.r, in) phases = c.s.Upgrade(c.r, in)
case runtime.SequenceStageUpgrade: case runtime.SequenceStageUpgrade:
var ( in, ok := data.(*machine.UpgradeRequest)
in *machine.UpgradeRequest if !ok {
ok bool
)
if in, ok = data.(*machine.UpgradeRequest); !ok {
return nil, runtime.ErrInvalidSequenceData return nil, runtime.ErrInvalidSequenceData
} }
phases = c.s.StageUpgrade(c.r, in) phases = c.s.StageUpgrade(c.r, in)
case runtime.SequenceReset: case runtime.SequenceMaintenanceUpgrade:
var ( in, ok := data.(*machine.UpgradeRequest)
in runtime.ResetOptions if !ok {
ok bool return nil, runtime.ErrInvalidSequenceData
) }
if in, ok = data.(runtime.ResetOptions); !ok { phases = c.s.MaintenanceUpgrade(c.r, in)
case runtime.SequenceReset:
in, ok := data.(runtime.ResetOptions)
if !ok {
return nil, runtime.ErrInvalidSequenceData return nil, runtime.ErrInvalidSequenceData
} }

View File

@ -59,6 +59,10 @@ func (m *mockSequencer) StageUpgrade(r runtime.Runtime, req *machine.UpgradeRequ
return m.phases[runtime.SequenceStageUpgrade] return m.phases[runtime.SequenceStageUpgrade]
} }
func (m *mockSequencer) MaintenanceUpgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase {
return m.phases[runtime.SequenceMaintenanceUpgrade]
}
func (m *mockSequencer) Upgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase { func (m *mockSequencer) Upgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase {
return m.phases[runtime.SequenceUpgrade] return m.phases[runtime.SequenceUpgrade]
} }

View File

@ -375,6 +375,44 @@ func (*Sequencer) StageUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest)
return phases return phases
} }
// MaintenanceUpgrade is the upgrade sequence in maintenance mode.
func (*Sequencer) MaintenanceUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase {
phases := PhaseList{}
switch r.State().Platform().Mode() { //nolint:exhaustive
case runtime.ModeContainer:
return nil
default:
phases = phases.Append(
"containerd",
StartContainerd,
).Append(
"verifyDisk",
VerifyDiskAvailability,
).Append(
"upgrade",
Upgrade,
).Append(
"mountBoot",
MountBootPartition,
).Append(
"kexec",
KexecPrepare,
).Append(
"unmountBoot",
UnmountBootPartition,
).Append(
"stopEverything",
StopAllServices,
).Append(
"reboot",
Reboot,
)
}
return phases
}
// Upgrade is the upgrade sequence. // Upgrade is the upgrade sequence.
func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase { func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase {
phases := PhaseList{} phases := PhaseList{}

View File

@ -43,7 +43,6 @@ import (
"kernel.org/pub/linux/libs/security/libcap/cap" "kernel.org/pub/linux/libs/security/libcap/cap"
installer "github.com/talos-systems/talos/cmd/installer/pkg/install" installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
"github.com/talos-systems/talos/internal/app/machined/internal/install"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader/adv" "github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader/adv"
@ -56,6 +55,7 @@ import (
"github.com/talos-systems/talos/internal/app/maintenance" "github.com/talos-systems/talos/internal/app/maintenance"
"github.com/talos-systems/talos/internal/pkg/cri" "github.com/talos-systems/talos/internal/pkg/cri"
"github.com/talos-systems/talos/internal/pkg/etcd" "github.com/talos-systems/talos/internal/pkg/etcd"
"github.com/talos-systems/talos/internal/pkg/install"
"github.com/talos-systems/talos/internal/pkg/mount" "github.com/talos-systems/talos/internal/pkg/mount"
"github.com/talos-systems/talos/internal/pkg/partition" "github.com/talos-systems/talos/internal/pkg/partition"
"github.com/talos-systems/talos/pkg/conditions" "github.com/talos-systems/talos/pkg/conditions"
@ -625,7 +625,7 @@ func receiveConfigViaMaintenanceService(ctx context.Context, logger *log.Logger,
Task: "runningMaintenance", Task: "runningMaintenance",
}) })
cfgBytes, err := maintenance.Run(ctx, logger, r) cfgBytes, err := maintenance.Run(ctx, logger)
if err != nil { if err != nil {
return nil, fmt.Errorf("maintenance service failed: %w", err) return nil, fmt.Errorf("maintenance service failed: %w", err)
} }

View File

@ -94,12 +94,21 @@ func (c *Containerd) Runner(r runtime.Runtime) (runner.Runner, error) {
} }
env := []string{} env := []string{}
if r.Config() != nil {
for key, val := range r.Config().Machine().Env() { for key, val := range r.Config().Machine().Env() {
env = append(env, fmt.Sprintf("%s=%s", key, val)) env = append(env, fmt.Sprintf("%s=%s", key, val))
} }
}
debug := false
if r.Config() != nil {
debug = r.Config().Debug()
}
return restart.New(process.NewRunner( return restart.New(process.NewRunner(
r.Config().Debug(), debug,
args, args,
runner.WithLoggingManager(r.Logging()), runner.WithLoggingManager(r.Logging()),
runner.WithEnv(env), runner.WithEnv(env),

View File

@ -32,19 +32,30 @@ import (
"github.com/talos-systems/talos/pkg/machinery/resources/network" "github.com/talos-systems/talos/pkg/machinery/resources/network"
) )
var ctrl runtime.Controller
// InjectController is used to pass the controller into the maintenance service.
func InjectController(c runtime.Controller) {
ctrl = c
}
// Run executes the configuration receiver, returning any configuration it receives. // Run executes the configuration receiver, returning any configuration it receives.
// //
//nolint:gocyclo //nolint:gocyclo
func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, error) { func Run(ctx context.Context, logger *log.Logger) ([]byte, error) {
if ctrl == nil {
return nil, fmt.Errorf("controller is not injected")
}
logger.Println("waiting for network address to be ready") logger.Println("waiting for network address to be ready")
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil { if err := network.NewReadyCondition(ctrl.Runtime().State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
return nil, fmt.Errorf("error waiting for the network to be ready: %w", err) return nil, fmt.Errorf("error waiting for the network to be ready: %w", err)
} }
var sideroLinkAddress netip.Addr var sideroLinkAddress netip.Addr
currentAddresses, err := r.State().V1Alpha2().Resources().WatchFor(ctx, currentAddresses, err := ctrl.Runtime().State().V1Alpha2().Resources().WatchFor(ctx,
resource.NewMetadata(network.NamespaceName, network.NodeAddressType, network.NodeAddressCurrentID, resource.VersionUndefined), resource.NewMetadata(network.NamespaceName, network.NodeAddressType, network.NodeAddressCurrentID, resource.VersionUndefined),
sideroLinkAddressFinder(&sideroLinkAddress, logger), sideroLinkAddressFinder(&sideroLinkAddress, logger),
) )
@ -55,7 +66,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs() ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs()
// hostname might not be available yet, so use it only if it is available // hostname might not be available yet, so use it only if it is available
hostnameStatus, err := r.State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined)) hostnameStatus, err := ctrl.Runtime().State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
if err != nil && !state.IsNotFoundError(err) { if err != nil && !state.IsNotFoundError(err) {
return nil, fmt.Errorf("error getting node hostname: %w", err) return nil, fmt.Errorf("error getting node hostname: %w", err)
} }
@ -83,7 +94,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
cfgCh := make(chan []byte) cfgCh := make(chan []byte)
s := server.New(r, logger, cfgCh) s := server.New(ctrl, logger, cfgCh)
injector := &authz.Injector{ injector := &authz.Injector{
Mode: authz.ReadOnly, Mode: authz.ReadOnly,
@ -152,7 +163,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
return cfg, err return cfg, err
case <-ctx.Done(): case <-ctx.Done():
return nil, fmt.Errorf("context is done") return nil, ctx.Err()
} }
} }

View File

@ -0,0 +1,50 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package server
import (
"context"
"net"
"net/netip"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/status"
"github.com/talos-systems/talos/pkg/machinery/resources/network"
)
func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool {
remotePeer, ok := peer.FromContext(ctx)
if !ok {
return false
}
if remotePeer.Addr.Network() != "tcp" {
return false
}
ip, _, err := net.SplitHostPort(remotePeer.Addr.String())
if err != nil {
return false
}
addr, err := netip.ParseAddr(ip)
if err != nil {
return false
}
return condition(addr)
}
func assertPeerSideroLink(ctx context.Context) error {
if !verifyPeer(ctx, func(addr netip.Addr) bool {
return network.IsULA(addr, network.ULASideroLink)
}) {
return status.Error(codes.Unimplemented, "API is not implemented in maintenance mode")
}
return nil
}

View File

@ -8,16 +8,14 @@ import (
"context" "context"
"fmt" "fmt"
"log" "log"
"net"
"net/netip"
"strings" "strings"
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1" cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
"github.com/cosi-project/runtime/pkg/state" "github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/protobuf/server" "github.com/cosi-project/runtime/pkg/state/protobuf/server"
"github.com/google/uuid"
"google.golang.org/grpc" "google.golang.org/grpc"
"google.golang.org/grpc/codes" "google.golang.org/grpc/codes"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/status" "google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/emptypb" "google.golang.org/protobuf/types/known/emptypb"
@ -30,7 +28,6 @@ import (
"github.com/talos-systems/talos/pkg/machinery/api/storage" "github.com/talos-systems/talos/pkg/machinery/api/storage"
"github.com/talos-systems/talos/pkg/machinery/config/configloader" "github.com/talos-systems/talos/pkg/machinery/config/configloader"
v1alpha1machine "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine" v1alpha1machine "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
"github.com/talos-systems/talos/pkg/machinery/resources/network"
"github.com/talos-systems/talos/pkg/version" "github.com/talos-systems/talos/pkg/version"
) )
@ -38,16 +35,16 @@ import (
type Server struct { type Server struct {
machine.UnimplementedMachineServiceServer machine.UnimplementedMachineServiceServer
runtime runtime.Runtime controller runtime.Controller
logger *log.Logger logger *log.Logger
cfgCh chan []byte cfgCh chan<- []byte
server *grpc.Server server *grpc.Server
} }
// New initializes and returns a `Server`. // New initializes and returns a `Server`.
func New(r runtime.Runtime, logger *log.Logger, cfgCh chan []byte) *Server { func New(c runtime.Controller, logger *log.Logger, cfgCh chan<- []byte) *Server {
return &Server{ return &Server{
runtime: r, controller: c,
logger: logger, logger: logger,
cfgCh: cfgCh, cfgCh: cfgCh,
} }
@ -58,7 +55,7 @@ func (s *Server) Register(obj *grpc.Server) {
s.server = obj s.server = obj
// wrap resources with access filter // wrap resources with access filter
resourceState := s.runtime.State().V1Alpha2().Resources() resourceState := s.controller.Runtime().State().V1Alpha2().Resources()
resourceState = state.WrapCore(state.Filter(resourceState, resources.AccessPolicy(resourceState))) resourceState = state.WrapCore(state.Filter(resourceState, resources.AccessPolicy(resourceState)))
storage.RegisterStorageServiceServer(obj, &storaged.Server{}) storage.RegisterStorageServiceServer(obj, &storaged.Server{})
@ -86,7 +83,7 @@ func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfig
return nil, fmt.Errorf("failed to parse config: %w", err) return nil, fmt.Errorf("failed to parse config: %w", err)
} }
warnings, err := cfgProvider.Validate(s.runtime.State().Platform().Mode()) warnings, err := cfgProvider.Validate(s.controller.Runtime().State().Platform().Mode())
if err != nil { if err != nil {
return nil, status.Errorf(codes.InvalidArgument, "configuration validation failed: %s", err) return nil, status.Errorf(codes.InvalidArgument, "configuration validation failed: %s", err)
} }
@ -131,43 +128,18 @@ func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.Ge
return nil, status.Error(codes.Unimplemented, "client configuration (talosconfig) can't be generated in the maintenance mode") return nil, status.Error(codes.Unimplemented, "client configuration (talosconfig) can't be generated in the maintenance mode")
} }
func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool {
remotePeer, ok := peer.FromContext(ctx)
if !ok {
return false
}
if remotePeer.Addr.Network() != "tcp" {
return false
}
ip, _, err := net.SplitHostPort(remotePeer.Addr.String())
if err != nil {
return false
}
addr, err := netip.ParseAddr(ip)
if err != nil {
return false
}
return condition(addr)
}
// Version implements the machine.MachineServer interface. // Version implements the machine.MachineServer interface.
func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.VersionResponse, error) { func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.VersionResponse, error) {
if !verifyPeer(ctx, func(addr netip.Addr) bool { if err := assertPeerSideroLink(ctx); err != nil {
return network.IsULA(addr, network.ULASideroLink) return nil, err
}) {
return nil, status.Error(codes.Unimplemented, "Version API is not implemented in maintenance mode")
} }
var platform *machine.PlatformInfo var platform *machine.PlatformInfo
if s.runtime.State().Platform() != nil { if s.controller.Runtime().State().Platform() != nil {
platform = &machine.PlatformInfo{ platform = &machine.PlatformInfo{
Name: s.runtime.State().Platform().Name(), Name: s.controller.Runtime().State().Platform().Name(),
Mode: s.runtime.State().Platform().Mode().String(), Mode: s.controller.Runtime().State().Platform().Mode().String(),
} }
} }
@ -180,3 +152,52 @@ func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.Versi
}, },
}, nil }, nil
} }
// Upgrade initiates an upgrade.
//
//nolint:gocyclo,cyclop
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
if err = assertPeerSideroLink(ctx); err != nil {
return nil, err
}
if s.controller.Runtime().State().Machine().Disk() == nil {
return nil, status.Errorf(codes.FailedPrecondition, "Talos is not installed")
}
actorID := uuid.New().String()
mode := s.controller.Runtime().State().Platform().Mode()
if !mode.Supports(runtime.Upgrade) {
return nil, status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
}
// none of the options are supported in maintenance mode
if in.GetPreserve() || in.GetStage() || in.GetForce() {
return nil, status.Errorf(codes.Unimplemented, "upgrade --preserve, --stage, and --force are not supported in maintenance mode")
}
log.Printf("upgrade request received: %q", in.GetImage())
runCtx := context.WithValue(context.Background(), runtime.ActorIDCtxKey{}, actorID)
go func() {
if err := s.controller.Run(runCtx, runtime.SequenceMaintenanceUpgrade, in); err != nil {
if !runtime.IsRebootError(err) {
log.Println("upgrade failed:", err)
}
}
}()
reply = &machine.UpgradeResponse{
Messages: []*machine.Upgrade{
{
Ack: "Upgrade request received",
ActorId: actorID,
},
},
}
return reply, nil
}

View File

@ -8,6 +8,7 @@ import (
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"io"
"log" "log"
"os" "os"
"strconv" "strconv"
@ -29,6 +30,7 @@ import (
"github.com/talos-systems/talos/internal/pkg/extensions" "github.com/talos-systems/talos/internal/pkg/extensions"
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine" machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/config" "github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1"
"github.com/talos-systems/talos/pkg/machinery/constants" "github.com/talos-systems/talos/pkg/machinery/constants"
) )
@ -46,9 +48,16 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
} }
} }
configBytes, err := cfg.Bytes() var (
if err != nil { registriesConfig config.Registries
return err extensionsConfig []config.Extension
)
if cfg != nil {
registriesConfig = cfg.Machine().Registries()
extensionsConfig = cfg.Machine().Install().Extensions()
} else {
registriesConfig = &v1alpha1.RegistriesConfig{}
} }
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
@ -77,7 +86,7 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
if img == nil || err != nil && errdefs.IsNotFound(err) { if img == nil || err != nil && errdefs.IsNotFound(err) {
log.Printf("pulling %q", ref) log.Printf("pulling %q", ref)
img, err = image.Pull(ctx, cfg.Machine().Registries(), client, ref) img, err = image.Pull(ctx, registriesConfig, client, ref)
} }
if err != nil { if err != nil {
@ -89,9 +98,11 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
return err return err
} }
if err = puller.PullAndMount(ctx, cfg.Machine().Registries(), cfg.Machine().Install().Extensions()); err != nil { if extensionsConfig != nil {
if err = puller.PullAndMount(ctx, registriesConfig, extensionsConfig); err != nil {
return err return err
} }
}
defer func() { defer func() {
if err = puller.Cleanup(ctx); err != nil { if err = puller.Cleanup(ctx); err != nil {
@ -205,19 +216,35 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
w := &kmsg.Writer{KmsgWriter: f} w := &kmsg.Writer{KmsgWriter: f}
configR := &containerdrunner.StdinCloser{ var r interface {
io.Reader
WaitAndClose(context.Context, containerd.Task)
}
if cfg != nil {
var configBytes []byte
configBytes, err = cfg.Bytes()
if err != nil {
return err
}
r = &containerdrunner.StdinCloser{
Stdin: bytes.NewReader(configBytes), Stdin: bytes.NewReader(configBytes),
Closer: make(chan struct{}), Closer: make(chan struct{}),
} }
}
creator := cio.NewCreator(cio.WithStreams(configR, w, w)) creator := cio.NewCreator(cio.WithStreams(r, w, w))
t, err := container.NewTask(ctx, creator) t, err := container.NewTask(ctx, creator)
if err != nil { if err != nil {
return err return err
} }
go configR.WaitAndClose(ctx, t) if r != nil {
go r.WaitAndClose(ctx, t)
}
defer t.Delete(ctx) //nolint:errcheck defer t.Delete(ctx) //nolint:errcheck
@ -242,10 +269,15 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
// OptionsFromUpgradeRequest builds installer options from upgrade request. // OptionsFromUpgradeRequest builds installer options from upgrade request.
func OptionsFromUpgradeRequest(r runtime.Runtime, in *machineapi.UpgradeRequest) []Option { func OptionsFromUpgradeRequest(r runtime.Runtime, in *machineapi.UpgradeRequest) []Option {
return []Option{ opts := []Option{
WithPull(false), WithPull(false),
WithUpgrade(true), WithUpgrade(true),
WithForce(!in.GetPreserve()), WithForce(!in.GetPreserve()),
WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()),
} }
if r.Config() != nil {
opts = append(opts, WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()))
}
return opts
} }

View File

@ -0,0 +1,110 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package install
import (
"context"
"fmt"
"github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/talos-systems/talos/internal/pkg/containers/image"
"github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/machinery/constants"
)
// PullAndValidateInstallerImage pulls down the installer and validates that it can run.
//
//nolint:gocyclo
func PullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error {
// Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry
containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace)
const containerID = "validate"
client, err := containerd.New(constants.SystemContainerdAddress)
if err != nil {
return err
}
defer client.Close() //nolint:errcheck
img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled())
if err != nil {
return err
}
// See if there's previous container/snapshot to clean up
var oldcontainer containerd.Container
if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil {
if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil {
return fmt.Errorf("error deleting old container instance: %w", err)
}
}
if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("error cleaning up stale snapshot: %w", err)
}
// Launch the container with a known help command for a simple check to make sure the image is valid
args := []string{
"/bin/installer",
"--help",
}
specOpts := []oci.SpecOpts{
oci.WithImageConfig(img),
oci.WithProcessArgs(args...),
}
containerOpts := []containerd.NewContainerOpts{
containerd.WithImage(img),
containerd.WithNewSnapshot(containerID, img),
containerd.WithNewSpec(specOpts...),
}
container, err := client.NewContainer(containerdctx, containerID, containerOpts...)
if err != nil {
return err
}
//nolint:errcheck
defer container.Delete(containerdctx, containerd.WithSnapshotCleanup)
task, err := container.NewTask(containerdctx, cio.NullIO)
if err != nil {
return err
}
//nolint:errcheck
defer task.Delete(containerdctx)
exitStatusC, err := task.Wait(containerdctx)
if err != nil {
return err
}
if err = task.Start(containerdctx); err != nil {
return err
}
status := <-exitStatusC
code, _, err := status.Result()
if err != nil {
return err
}
if code != 0 {
return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer")
}
return nil
}

View File

@ -2201,6 +2201,7 @@ talosctl upgrade [flags]
-f, --force force the upgrade (skip checks on etcd health and members, might lead to data loss) -f, --force force the upgrade (skip checks on etcd health and members, might lead to data loss)
-h, --help help for upgrade -h, --help help for upgrade
-i, --image string the container image to use for performing the install -i, --image string the container image to use for performing the install
--insecure upgrade using the insecure (encrypted with no auth) maintenance service
-p, --preserve preserve data -p, --preserve preserve data
-s, --stage stage the upgrade to perform it after a reboot -s, --stage stage the upgrade to perform it after a reboot
--wait wait for the operation to complete, tracking its progress. always set to true when --debug is set --wait wait for the operation to complete, tracking its progress. always set to true when --debug is set