Andrew Rynhard 49307d554d refactor: improve machined
This is a rewrite of machined. It addresses some of the limitations and
complexity in the implementation. This introduces the idea of a
controller. A controller is responsible for managing the runtime, the
sequencer, and a new state type introduced in this PR.

A few highlights are:

- no more event bus
- functional approach to tasks (no more types defined for each task)
  - the task function definition now offers a lot more context, like
    access to raw API requests, the current sequence, a logger, the new
    state interface, and the runtime interface.
- no more panics to handle reboots
- additional initialize and reboot sequences
- graceful gRPC server shutdown on critical errors
- config is now stored at install time to avoid having to download it at
  install time and at boot time
- upgrades now use the local config instead of downloading it
- the upgrade API's preserve option takes precedence over the config's
  install force option

Additionally, this pulls various packes in under machined to make the
code easier to navigate.

Signed-off-by: Andrew Rynhard <andrew@andrewrynhard.com>
2020-04-28 08:20:55 -07:00

122 lines
3.1 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Package check provides set of checks to verify cluster readiness.
package check
import (
"context"
"fmt"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/pkg/client"
)
// ServiceStateAssertion checks whether service reached some specified state.
//
//nolint: gocyclo
func ServiceStateAssertion(ctx context.Context, cluster ClusterInfo, service string, states ...string) error {
cli, err := cluster.Client()
if err != nil {
return err
}
// perform check against "init" node
initNodes := cluster.NodesByType(runtime.MachineTypeInit)
if len(initNodes) != 1 {
return fmt.Errorf("init node not found, len(initNodes) = %d", len(initNodes))
}
nodeCtx := client.WithNodes(ctx, initNodes[0])
servicesInfo, err := cli.ServiceInfo(nodeCtx, service)
if err != nil {
return err
}
serviceOk := false
acceptedStates := map[string]struct{}{}
for _, state := range states {
acceptedStates[state] = struct{}{}
}
for _, serviceInfo := range servicesInfo {
if len(serviceInfo.Service.Events.Events) == 0 {
return fmt.Errorf("no events recorded yet for service %q", service)
}
lastEvent := serviceInfo.Service.Events.Events[len(serviceInfo.Service.Events.Events)-1]
if _, ok := acceptedStates[lastEvent.State]; !ok {
return fmt.Errorf("service %q not in expected state %q: current state [%s] %s", service, states, lastEvent.State, lastEvent.Msg)
}
serviceOk = true
}
if !serviceOk {
return fmt.Errorf("service %q not found", service)
}
return nil
}
// ServiceHealthAssertion checks whether service reached some specified state.
//nolint: gocyclo
func ServiceHealthAssertion(ctx context.Context, cluster ClusterInfo, service string, setters ...Option) error {
opts := DefaultOptions()
for _, setter := range setters {
if err := setter(opts); err != nil {
return err
}
}
cli, err := cluster.Client()
if err != nil {
return err
}
var nodes []string
if len(opts.Types) > 0 {
for _, t := range opts.Types {
nodes = append(nodes, cluster.NodesByType(t)...)
}
} else {
nodes = cluster.Nodes()
}
count := len(nodes)
nodesCtx := client.WithNodes(ctx, nodes...)
servicesInfo, err := cli.ServiceInfo(nodesCtx, service)
if err != nil {
return err
}
if len(servicesInfo) != count {
return fmt.Errorf("expected a response with %d node(s), got %d", count, len(servicesInfo))
}
for _, serviceInfo := range servicesInfo {
if len(serviceInfo.Service.Events.Events) == 0 {
return fmt.Errorf("no events recorded yet for service %q", service)
}
lastEvent := serviceInfo.Service.Events.Events[len(serviceInfo.Service.Events.Events)-1]
if lastEvent.State != "Running" {
return fmt.Errorf("service %q not in expected state %q: current state [%s] %s", service, "Running", lastEvent.State, lastEvent.Msg)
}
if !serviceInfo.Service.GetHealth().GetHealthy() {
return fmt.Errorf("service is not healthy: %s", service)
}
}
return nil
}