feat: provide random node identity

Fixes #4137

Node identity is established when `STATE` partition is mounted, and
cached there. Node identity will be used for the cluster discovery
process to identify each node of the cluster.

Random 32 bytes encoded via base62 are used as node identity.

`base62` uses only URL-safe characters which might save us some trouble
later.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2021-08-25 23:46:28 +03:00
parent 032e7c6b86
commit 7f22879af0
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
12 changed files with 521 additions and 0 deletions

1
go.mod
View File

@ -52,6 +52,7 @@ require (
github.com/imdario/mergo v0.3.12 // indirect github.com/imdario/mergo v0.3.12 // indirect
github.com/insomniacslk/dhcp v0.0.0-20210817203519-d82598001386 github.com/insomniacslk/dhcp v0.0.0-20210817203519-d82598001386
github.com/jsimonetti/rtnetlink v0.0.0-20210614053835-9c52e516c709 github.com/jsimonetti/rtnetlink v0.0.0-20210614053835-9c52e516c709
github.com/jxskiss/base62 v0.0.0-20191017122030-4f11678b909b
github.com/mattn/go-isatty v0.0.13 github.com/mattn/go-isatty v0.0.13
github.com/mdlayher/arp v0.0.0-20191213142603-f72070a231fc github.com/mdlayher/arp v0.0.0-20191213142603-f72070a231fc
github.com/mdlayher/ethtool v0.0.0-20210210192532-2b88debcdd43 github.com/mdlayher/ethtool v0.0.0-20210210192532-2b88debcdd43

2
go.sum
View File

@ -761,6 +761,8 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/jxskiss/base62 v0.0.0-20191017122030-4f11678b909b h1:XUr8tvMEILhphQPp3TFcIudb5KTOzFeD0pJyDn5+5QI=
github.com/jxskiss/base62 v0.0.0-20191017122030-4f11678b909b/go.mod h1:a5Mn24iYVJRUQSkFupGByqykzD+k+wFI8J91zGHuPf8=
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4= github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA= github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=

View File

@ -0,0 +1,6 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Package cluster provides controllers which manage Talos cluster resources.
package cluster

View File

@ -0,0 +1,159 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster
import (
"context"
"fmt"
"os"
"path/filepath"
"reflect"
"github.com/AlekSi/pointer"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"go.uber.org/zap"
"gopkg.in/yaml.v3"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/resources/cluster"
runtimeres "github.com/talos-systems/talos/pkg/resources/runtime"
"github.com/talos-systems/talos/pkg/resources/v1alpha1"
)
// NodeIdentityController manages runtime.Identity caching identity in the STATE.
type NodeIdentityController struct {
V1Alpha1Mode runtime.Mode
StatePath string
identityEstablished bool
}
// Name implements controller.Controller interface.
func (ctrl *NodeIdentityController) Name() string {
return "cluster.NodeIdentityController"
}
// Inputs implements controller.Controller interface.
func (ctrl *NodeIdentityController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: v1alpha1.NamespaceName,
Type: runtimeres.MountStatusType,
ID: pointer.ToString(constants.StatePartitionLabel),
Kind: controller.InputWeak,
},
}
}
// Outputs implements controller.Controller interface.
func (ctrl *NodeIdentityController) Outputs() []controller.Output {
return []controller.Output{
{
Type: cluster.IdentityType,
Kind: controller.OutputShared,
},
}
}
func loadOrNewFromState(statePath, path string, empty interface{}, generate func(interface{}) error) error {
fullPath := filepath.Join(statePath, path)
f, err := os.OpenFile(fullPath, os.O_RDONLY, 0)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("error reading state file: %w", err)
}
// file doesn't exist yet, generate new value and save it
if f == nil {
if err = generate(empty); err != nil {
return err
}
f, err = os.OpenFile(fullPath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o600)
if err != nil {
return fmt.Errorf("error creating state file: %w", err)
}
defer f.Close() //nolint:errcheck
encoder := yaml.NewEncoder(f)
if err = encoder.Encode(empty); err != nil {
return fmt.Errorf("error marshaling: %w", err)
}
if err = encoder.Close(); err != nil {
return err
}
return f.Close()
}
// read existing cached value
defer f.Close() //nolint:errcheck
if err = yaml.NewDecoder(f).Decode(empty); err != nil {
return fmt.Errorf("error unmarshaling: %w", err)
}
if reflect.ValueOf(empty).Elem().IsZero() {
return fmt.Errorf("value is still zero after unmarshaling")
}
return f.Close()
}
// Run implements controller.Controller interface.
//
//nolint:gocyclo
func (ctrl *NodeIdentityController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
if ctrl.StatePath == "" {
ctrl.StatePath = constants.StateMountPoint
}
for {
select {
case <-ctx.Done():
return nil
case <-r.EventCh():
}
if _, err := r.Get(ctx, resource.NewMetadata(v1alpha1.NamespaceName, runtimeres.MountStatusType, constants.StatePartitionLabel, resource.VersionUndefined)); err != nil {
if state.IsNotFoundError(err) {
// in container mode STATE is always mounted
if ctrl.V1Alpha1Mode != runtime.ModeContainer {
// wait for the STATE to be mounted
continue
}
} else {
return fmt.Errorf("error reading mount status: %w", err)
}
}
var localIdentity cluster.IdentitySpec
if err := loadOrNewFromState(ctrl.StatePath, constants.NodeIdentityFilename, &localIdentity, func(v interface{}) error {
return v.(*cluster.IdentitySpec).Generate()
}); err != nil {
return fmt.Errorf("error caching node identity: %w", err)
}
if err := r.Modify(ctx, cluster.NewIdentity(cluster.NamespaceName, cluster.LocalIdentity), func(r resource.Resource) error {
*r.(*cluster.Identity).TypedSpec() = localIdentity
return nil
}); err != nil {
return fmt.Errorf("error modifying resource: %w", err)
}
if !ctrl.identityEstablished {
logger.Info("node identity established", zap.String("node_id", localIdentity.NodeID))
ctrl.identityEstablished = true
}
}
}

View File

@ -0,0 +1,166 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster_test
import (
"context"
"fmt"
"log"
"os"
"path/filepath"
"reflect"
"sync"
"testing"
"time"
"github.com/cosi-project/runtime/pkg/controller/runtime"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/impl/inmem"
"github.com/cosi-project/runtime/pkg/state/impl/namespaced"
"github.com/stretchr/testify/suite"
"github.com/talos-systems/go-retry/retry"
clusterctrl "github.com/talos-systems/talos/internal/app/machined/pkg/controllers/cluster"
v1alpha1runtime "github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/pkg/logging"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/resources/cluster"
runtimeres "github.com/talos-systems/talos/pkg/resources/runtime"
"github.com/talos-systems/talos/pkg/resources/v1alpha1"
)
type NodeIdentitySuite struct {
suite.Suite
state state.State
runtime *runtime.Runtime
wg sync.WaitGroup
ctx context.Context
ctxCancel context.CancelFunc
statePath string
}
func (suite *NodeIdentitySuite) SetupTest() {
suite.statePath = suite.T().TempDir()
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 3*time.Minute)
suite.state = state.WrapCore(namespaced.NewState(inmem.Build))
var err error
suite.runtime, err = runtime.NewRuntime(suite.state, logging.Wrap(log.Writer()))
suite.Require().NoError(err)
suite.startRuntime()
}
func (suite *NodeIdentitySuite) startRuntime() {
suite.wg.Add(1)
go func() {
defer suite.wg.Done()
suite.Assert().NoError(suite.runtime.Run(suite.ctx))
}()
}
func (suite *NodeIdentitySuite) assertNodeIdentities(expected []string) error {
resources, err := suite.state.List(suite.ctx, resource.NewMetadata(cluster.NamespaceName, cluster.IdentityType, "", resource.VersionUndefined))
if err != nil {
return err
}
ids := make([]string, 0, len(resources.Items))
for _, res := range resources.Items {
ids = append(ids, res.Metadata().ID())
}
if !reflect.DeepEqual(expected, ids) {
return retry.ExpectedError(fmt.Errorf("expected %q, got %q", expected, ids))
}
return nil
}
func (suite *NodeIdentitySuite) TestContainerMode() {
suite.Require().NoError(suite.runtime.RegisterController(&clusterctrl.NodeIdentityController{
StatePath: suite.statePath,
V1Alpha1Mode: v1alpha1runtime.ModeContainer,
}))
suite.Assert().NoError(retry.Constant(3*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
return suite.assertNodeIdentities([]string{cluster.LocalIdentity})
},
))
}
func (suite *NodeIdentitySuite) TestDefault() {
suite.Require().NoError(suite.runtime.RegisterController(&clusterctrl.NodeIdentityController{
StatePath: suite.statePath,
V1Alpha1Mode: v1alpha1runtime.ModeMetal,
}))
time.Sleep(500 * time.Millisecond)
_, err := suite.state.Get(suite.ctx, cluster.NewIdentity(cluster.NamespaceName, cluster.LocalIdentity).Metadata())
suite.Assert().True(state.IsNotFoundError(err))
stateMount := runtimeres.NewMountStatus(v1alpha1.NamespaceName, constants.StatePartitionLabel)
suite.Assert().NoError(suite.state.Create(suite.ctx, stateMount))
suite.Assert().NoError(retry.Constant(3*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
return suite.assertNodeIdentities([]string{cluster.LocalIdentity})
},
))
}
func (suite *NodeIdentitySuite) TestLoad() {
suite.Require().NoError(suite.runtime.RegisterController(&clusterctrl.NodeIdentityController{
StatePath: suite.statePath,
V1Alpha1Mode: v1alpha1runtime.ModeMetal,
}))
// using verbatim data here to make sure nodeId representation is supported in future version fo Talos
suite.Require().NoError(os.WriteFile(filepath.Join(suite.statePath, constants.NodeIdentityFilename), []byte("nodeId: gvqfS27LxD58lPlASmpaueeRVzuof16iXoieRgEvBWaE\n"), 0o600))
stateMount := runtimeres.NewMountStatus(v1alpha1.NamespaceName, constants.StatePartitionLabel)
suite.Assert().NoError(suite.state.Create(suite.ctx, stateMount))
suite.Assert().NoError(retry.Constant(3*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
return suite.assertNodeIdentities([]string{cluster.LocalIdentity})
},
))
r, err := suite.state.Get(suite.ctx, cluster.NewIdentity(cluster.NamespaceName, cluster.LocalIdentity).Metadata())
suite.Require().NoError(err)
suite.Assert().Equal("gvqfS27LxD58lPlASmpaueeRVzuof16iXoieRgEvBWaE", r.(*cluster.Identity).TypedSpec().NodeID)
}
func (suite *NodeIdentitySuite) TearDownTest() {
suite.T().Log("tear down")
suite.ctxCancel()
suite.wg.Wait()
// trigger updates in resources to stop watch loops
suite.Assert().NoError(suite.state.Create(context.Background(), runtimeres.NewMountStatus(v1alpha1.NamespaceName, "-")))
}
func TestNodeIdentitySuite(t *testing.T) {
suite.Run(t, new(NodeIdentitySuite))
}

View File

@ -15,6 +15,7 @@ import (
"go.uber.org/zap" "go.uber.org/zap"
"go.uber.org/zap/zapcore" "go.uber.org/zap/zapcore"
"github.com/talos-systems/talos/internal/app/machined/pkg/controllers/cluster"
"github.com/talos-systems/talos/internal/app/machined/pkg/controllers/config" "github.com/talos-systems/talos/internal/app/machined/pkg/controllers/config"
"github.com/talos-systems/talos/internal/app/machined/pkg/controllers/files" "github.com/talos-systems/talos/internal/app/machined/pkg/controllers/files"
"github.com/talos-systems/talos/internal/app/machined/pkg/controllers/k8s" "github.com/talos-systems/talos/internal/app/machined/pkg/controllers/k8s"
@ -74,6 +75,7 @@ func (ctrl *Controller) Run(ctx context.Context) error {
&time.SyncController{ &time.SyncController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(), V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
}, },
&cluster.NodeIdentityController{},
&config.MachineTypeController{}, &config.MachineTypeController{},
&config.K8sControlPlaneController{}, &config.K8sControlPlaneController{},
&files.EtcFileController{ &files.EtcFileController{

View File

@ -14,6 +14,7 @@ import (
"github.com/cosi-project/runtime/pkg/state/registry" "github.com/cosi-project/runtime/pkg/state/registry"
talosconfig "github.com/talos-systems/talos/pkg/machinery/config" talosconfig "github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/resources/cluster"
"github.com/talos-systems/talos/pkg/resources/config" "github.com/talos-systems/talos/pkg/resources/config"
"github.com/talos-systems/talos/pkg/resources/files" "github.com/talos-systems/talos/pkg/resources/files"
"github.com/talos-systems/talos/pkg/resources/k8s" "github.com/talos-systems/talos/pkg/resources/k8s"
@ -57,6 +58,7 @@ func NewState() (*State, error) {
description string description string
}{ }{
{v1alpha1.NamespaceName, "Talos v1alpha1 subsystems glue resources."}, {v1alpha1.NamespaceName, "Talos v1alpha1 subsystems glue resources."},
{cluster.NamespaceName, "Cluster configuration and discovery resources."},
{config.NamespaceName, "Talos node configuration."}, {config.NamespaceName, "Talos node configuration."},
{files.NamespaceName, "Files and file-like resources."}, {files.NamespaceName, "Files and file-like resources."},
{k8s.ControlPlaneNamespaceName, "Kubernetes control plane resources."}, {k8s.ControlPlaneNamespaceName, "Kubernetes control plane resources."},
@ -73,6 +75,7 @@ func NewState() (*State, error) {
// register Talos resources // register Talos resources
for _, r := range []resource.Resource{ for _, r := range []resource.Resource{
&v1alpha1.Service{}, &v1alpha1.Service{},
&cluster.Identity{},
&config.MachineConfig{}, &config.MachineConfig{},
&config.MachineType{}, &config.MachineType{},
&config.K8sControlPlane{}, &config.K8sControlPlane{},

View File

@ -458,6 +458,12 @@ const (
// DefaultClusterSecretSize is the default size in bytes for the cluster secret. // DefaultClusterSecretSize is the default size in bytes for the cluster secret.
DefaultClusterSecretSize = 32 DefaultClusterSecretSize = 32
// DefaultNodeIdentitySize is the default size in bytes for the node ID.
DefaultNodeIdentitySize = 32
// NodeIdentityFilename is the filename to cache node identity across reboots.
NodeIdentityFilename = "node-identity.yaml"
) )
// See https://linux.die.net/man/3/klogctl // See https://linux.die.net/man/3/klogctl

View File

@ -0,0 +1,10 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster
import "github.com/cosi-project/runtime/pkg/resource"
// NamespaceName contains resources related to cluster as a whole.
const NamespaceName resource.Namespace = "cluster"

View File

@ -0,0 +1,32 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster_test
import (
"context"
"testing"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/impl/inmem"
"github.com/cosi-project/runtime/pkg/state/impl/namespaced"
"github.com/cosi-project/runtime/pkg/state/registry"
"github.com/stretchr/testify/assert"
"github.com/talos-systems/talos/pkg/resources/cluster"
)
func TestRegisterResource(t *testing.T) {
ctx := context.TODO()
resources := state.WrapCore(namespaced.NewState(inmem.Build))
resourceRegistry := registry.NewResourceRegistry(resources)
for _, resource := range []resource.Resource{
&cluster.Identity{},
} {
assert.NoError(t, resourceRegistry.Register(ctx, resource))
}
}

View File

@ -0,0 +1,106 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster
import (
"crypto/rand"
"fmt"
"io"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/resource/meta"
"github.com/jxskiss/base62"
"github.com/talos-systems/talos/pkg/machinery/constants"
)
// IdentityType is type of Identity resource.
const IdentityType = resource.Type("Identities.cluster.talos.dev")
// LocalIdentity is the resource ID for the local node identity.
const LocalIdentity = resource.ID("local")
// Identity resource holds node identity (as a member of the cluster).
type Identity struct {
md resource.Metadata
spec IdentitySpec
}
// IdentitySpec describes status of rendered secrets.
//
// Note: IdentitySpec is persisted on disk in the STATE partition,
// so YAML serialization should be kept backwards compatible.
type IdentitySpec struct {
// NodeID is a random value which is persisted across reboots,
// but it gets reset on wipe.
NodeID string `yaml:"nodeId"`
}
// NewIdentity initializes a Identity resource.
func NewIdentity(namespace resource.Namespace, id resource.ID) *Identity {
r := &Identity{
md: resource.NewMetadata(namespace, IdentityType, id, resource.VersionUndefined),
spec: IdentitySpec{},
}
r.md.BumpVersion()
return r
}
// Metadata implements resource.Resource.
func (r *Identity) Metadata() *resource.Metadata {
return &r.md
}
// Spec implements resource.Resource.
func (r *Identity) Spec() interface{} {
return r.spec
}
func (r *Identity) String() string {
return fmt.Sprintf("cluster.Identity(%q)", r.md.ID())
}
// DeepCopy implements resource.Resource.
func (r *Identity) DeepCopy() resource.Resource {
return &Identity{
md: r.md,
spec: r.spec,
}
}
// ResourceDefinition implements meta.ResourceDefinitionProvider interface.
func (r *Identity) ResourceDefinition() meta.ResourceDefinitionSpec {
return meta.ResourceDefinitionSpec{
Type: IdentityType,
Aliases: []resource.Type{},
DefaultNamespace: NamespaceName,
PrintColumns: []meta.PrintColumn{
{
Name: "ID",
JSONPath: `{.nodeId}`,
},
},
}
}
// TypedSpec allows to access the Spec with the proper type.
func (r *Identity) TypedSpec() *IdentitySpec {
return &r.spec
}
// Generate new identity.
func (spec *IdentitySpec) Generate() error {
buf := make([]byte, constants.DefaultNodeIdentitySize)
if _, err := io.ReadFull(rand.Reader, buf); err != nil {
return err
}
spec.NodeID = base62.EncodeToString(buf)
return nil
}

View File

@ -0,0 +1,28 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package cluster_test
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/talos-systems/talos/pkg/resources/cluster"
)
func TestIdentityGenerate(t *testing.T) {
var spec1, spec2 cluster.IdentitySpec
require.NoError(t, spec1.Generate())
require.NoError(t, spec2.Generate())
assert.NotEqual(t, spec1, spec2)
length := len(spec1.NodeID)
assert.GreaterOrEqual(t, length, 43)
assert.LessOrEqual(t, length, 44)
}