mirror of
https://github.com/siderolabs/talos.git
synced 2026-05-05 12:26:21 +02:00
fix: rework the way CRI config generation is waited for
Instead of relying on the fact that CRI patch should modify the generated final CRI config, rely on the specific checksum of the CRI patch to be included into the generated CRI config. This also to resolve Talos hanging on boot when a CRI patch is a no-op (it doesn't change the generated config). Fixes #11132 Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
parent
0114183de6
commit
c7d4191e78
@ -6,6 +6,7 @@ package files
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
@ -72,13 +73,21 @@ func (ctrl *CRIConfigPartsController) Run(ctx context.Context, r controller.Runt
|
||||
|
||||
slices.Sort(parts)
|
||||
|
||||
out, err := toml.Merge(parts)
|
||||
out, checksums, err := toml.Merge(parts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := safe.WriterModify(ctx, r, files.NewEtcFileSpec(files.NamespaceName, constants.CRIConfig),
|
||||
func(r *files.EtcFileSpec) error {
|
||||
for _, key := range r.Metadata().Annotations().Raw() {
|
||||
r.Metadata().Annotations().Delete(key)
|
||||
}
|
||||
|
||||
for path, checksum := range checksums {
|
||||
r.Metadata().Annotations().Set(files.SourceFileAnnotation+":"+path, hex.EncodeToString(checksum))
|
||||
}
|
||||
|
||||
spec := r.TypedSpec()
|
||||
|
||||
spec.Contents = out
|
||||
|
||||
@ -8,6 +8,8 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@ -662,20 +664,6 @@ func injectCRIConfigPatch(ctx context.Context, st state.State, content []byte) e
|
||||
ctx, cancel := context.WithTimeout(ctx, time.Minute)
|
||||
defer cancel()
|
||||
|
||||
ch := make(chan state.Event)
|
||||
|
||||
// wait for the CRI config to be created
|
||||
if err := st.Watch(ctx, resourcefiles.NewEtcFileSpec(resourcefiles.NamespaceName, constants.CRIConfig).Metadata(), ch); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// first update should be received about the existing resource
|
||||
select {
|
||||
case <-ch:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
etcFileSpec := resourcefiles.NewEtcFileSpec(resourcefiles.NamespaceName, constants.CRICustomizationConfigPart)
|
||||
etcFileSpec.TypedSpec().Mode = 0o600
|
||||
etcFileSpec.TypedSpec().Contents = content
|
||||
@ -685,24 +673,33 @@ func injectCRIConfigPatch(ctx context.Context, st state.State, content []byte) e
|
||||
return err
|
||||
}
|
||||
|
||||
// wait for the CRI config parts controller to generate the merged file
|
||||
var version resource.Version
|
||||
checksumRaw := sha256.Sum256(content)
|
||||
expectedChecksum := hex.EncodeToString(checksumRaw[:])
|
||||
expectedAnnotation := resourcefiles.SourceFileAnnotation + ":" + filepath.Join("/etc", etcFileSpec.Metadata().ID())
|
||||
|
||||
select {
|
||||
case ev := <-ch:
|
||||
version = ev.Resource.Metadata().Version()
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
fileSpec, err := st.WatchFor(ctx, resourcefiles.NewEtcFileSpec(resourcefiles.NamespaceName, constants.CRIConfig).Metadata(),
|
||||
state.WithCondition(func(r resource.Resource) (bool, error) {
|
||||
spec, ok := r.(*resourcefiles.EtcFileSpec)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
value, ok := spec.Metadata().Annotations().Get(expectedAnnotation)
|
||||
|
||||
return ok && value == expectedChecksum, nil
|
||||
}))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error waiting for file %q to be updated: %w", constants.CRIConfig, err)
|
||||
}
|
||||
|
||||
// wait for the file to be rendered
|
||||
_, err := st.WatchFor(ctx, resourcefiles.NewEtcFileStatus(resourcefiles.NamespaceName, constants.CRIConfig).Metadata(), state.WithCondition(func(r resource.Resource) (bool, error) {
|
||||
_, err = st.WatchFor(ctx, resourcefiles.NewEtcFileStatus(resourcefiles.NamespaceName, constants.CRIConfig).Metadata(), state.WithCondition(func(r resource.Resource) (bool, error) {
|
||||
fileStatus, ok := r.(*resourcefiles.EtcFileStatus)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return fileStatus.TypedSpec().SpecVersion == version.String(), nil
|
||||
return fileStatus.TypedSpec().SpecVersion == fileSpec.Metadata().Version().String(), nil
|
||||
}))
|
||||
|
||||
return err
|
||||
|
||||
@ -18,6 +18,7 @@ import (
|
||||
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
|
||||
"github.com/cosi-project/runtime/pkg/safe"
|
||||
"github.com/siderolabs/gen/ensure"
|
||||
"github.com/siderolabs/gen/xslices"
|
||||
"github.com/siderolabs/go-pointer"
|
||||
"github.com/siderolabs/go-retry/retry"
|
||||
"github.com/stretchr/testify/assert"
|
||||
@ -121,10 +122,7 @@ func (suite *ApplyConfigSuite) TestApply() {
|
||||
Mode: machineapi.ApplyConfigurationRequest_REBOOT,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
// It is expected that the connection will EOF here, so just log the error
|
||||
suite.Assert().NoErrorf(err, "failed to apply configuration (node %q)", node)
|
||||
}
|
||||
suite.Assert().NoErrorf(err, "failed to apply configuration (node %q)", node)
|
||||
|
||||
return nil
|
||||
}, assertRebootedRebootTimeout,
|
||||
@ -153,6 +151,84 @@ func (suite *ApplyConfigSuite) TestApply() {
|
||||
)
|
||||
}
|
||||
|
||||
// TestApplyNoOpCRIPatch verifies the apply config with no-op CRI patch.
|
||||
func (suite *ApplyConfigSuite) TestApplyNoOpCRIPatch() {
|
||||
if testing.Short() {
|
||||
suite.T().Skip("skipping in short mode")
|
||||
}
|
||||
|
||||
if !suite.Capabilities().SupportsReboot {
|
||||
suite.T().Skip("cluster doesn't support reboot")
|
||||
}
|
||||
|
||||
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)
|
||||
|
||||
suite.WaitForBootDone(suite.ctx)
|
||||
|
||||
nodeCtx := client.WithNode(suite.ctx, node)
|
||||
|
||||
provider, err := suite.ReadConfigFromNode(nodeCtx)
|
||||
suite.Assert().NoErrorf(err, "failed to read existing config from node %q", node)
|
||||
|
||||
// this CRI patch is a no-op, as NRI is already disabled by default, this verifies that CRI config generation handles it correctly.
|
||||
cfgDataOut := suite.PatchV1Alpha1Config(provider, func(cfg *v1alpha1.Config) {
|
||||
cfg.MachineConfig.MachineFiles = xslices.Filter(cfg.MachineConfig.MachineFiles, func(file *v1alpha1.MachineFile) bool {
|
||||
return file.FilePath != "/etc/cri/conf.d/20-customization.part"
|
||||
})
|
||||
|
||||
cfg.MachineConfig.MachineFiles = append(cfg.MachineConfig.MachineFiles,
|
||||
&v1alpha1.MachineFile{
|
||||
FilePath: "/etc/cri/conf.d/20-customization.part",
|
||||
FileOp: "create",
|
||||
FileContent: `[plugins]
|
||||
[plugins."io.containerd.nri.v1.nri"]
|
||||
disable = true`,
|
||||
},
|
||||
)
|
||||
})
|
||||
|
||||
suite.AssertRebooted(
|
||||
suite.ctx, node, func(nodeCtx context.Context) error {
|
||||
_, err = suite.Client.ApplyConfiguration(
|
||||
nodeCtx, &machineapi.ApplyConfigurationRequest{
|
||||
Data: cfgDataOut,
|
||||
Mode: machineapi.ApplyConfigurationRequest_REBOOT,
|
||||
},
|
||||
)
|
||||
suite.Assert().NoErrorf(err, "failed to apply configuration (node %q)", node)
|
||||
|
||||
return nil
|
||||
}, assertRebootedRebootTimeout,
|
||||
suite.CleanupFailedPods,
|
||||
)
|
||||
|
||||
// revert the patch
|
||||
provider, err = suite.ReadConfigFromNode(nodeCtx)
|
||||
suite.Assert().NoErrorf(err, "failed to read existing config from node %q", node)
|
||||
|
||||
// this CRI patch is a no-op, as NRI is already disabled by default, this verifies that CRI config generation handles it correctly.
|
||||
cfgDataOut = suite.PatchV1Alpha1Config(provider, func(cfg *v1alpha1.Config) {
|
||||
cfg.MachineConfig.MachineFiles = xslices.Filter(cfg.MachineConfig.MachineFiles, func(file *v1alpha1.MachineFile) bool {
|
||||
return file.FilePath != "/etc/cri/conf.d/20-customization.part"
|
||||
})
|
||||
})
|
||||
|
||||
suite.AssertRebooted(
|
||||
suite.ctx, node, func(nodeCtx context.Context) error {
|
||||
_, err = suite.Client.ApplyConfiguration(
|
||||
nodeCtx, &machineapi.ApplyConfigurationRequest{
|
||||
Data: cfgDataOut,
|
||||
Mode: machineapi.ApplyConfigurationRequest_REBOOT,
|
||||
},
|
||||
)
|
||||
suite.Assert().NoErrorf(err, "failed to apply configuration (node %q)", node)
|
||||
|
||||
return nil
|
||||
}, assertRebootedRebootTimeout,
|
||||
suite.CleanupFailedPods,
|
||||
)
|
||||
}
|
||||
|
||||
// TestApplyWithoutReboot verifies the apply config API without reboot.
|
||||
func (suite *ApplyConfigSuite) TestApplyWithoutReboot() {
|
||||
for _, mode := range []machineapi.ApplyConfigurationRequest_Mode{
|
||||
|
||||
@ -6,7 +6,10 @@ package toml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
@ -14,37 +17,47 @@ import (
|
||||
"github.com/siderolabs/talos/pkg/machinery/config/merge"
|
||||
)
|
||||
|
||||
func tomlDecodeFile(path string, dest any) error {
|
||||
// tomlDecodeFile decodes a TOML file into the provided destination, and returns a sha256 hash of the file content.
|
||||
func tomlDecodeFile(path string, dest any) ([]byte, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
return toml.NewDecoder(f).Decode(dest)
|
||||
hash := sha256.New()
|
||||
|
||||
err = toml.NewDecoder(io.TeeReader(f, hash)).Decode(dest)
|
||||
|
||||
return hash.Sum(nil), err
|
||||
}
|
||||
|
||||
// Merge several TOML documents in files into one.
|
||||
//
|
||||
// Merge process relies on generic map[string]any merge which might not always be correct.
|
||||
func Merge(parts []string) ([]byte, error) {
|
||||
//
|
||||
// Merge returns a sha256 checksum of each file merged.
|
||||
func Merge(parts []string) ([]byte, map[string][]byte, error) {
|
||||
merged := map[string]any{}
|
||||
checksums := make(map[string][]byte, len(parts))
|
||||
|
||||
var header []byte
|
||||
|
||||
for _, part := range parts {
|
||||
partial := map[string]any{}
|
||||
|
||||
if err := tomlDecodeFile(part, &partial); err != nil {
|
||||
return nil, fmt.Errorf("error decoding %q: %w", part, err)
|
||||
hash, err := tomlDecodeFile(part, &partial)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error decoding %q: %w", part, err)
|
||||
}
|
||||
|
||||
if err := merge.Merge(merged, partial); err != nil {
|
||||
return nil, fmt.Errorf("error merging %q: %w", part, err)
|
||||
return nil, nil, fmt.Errorf("error merging %q: %w", part, err)
|
||||
}
|
||||
|
||||
header = append(header, []byte(fmt.Sprintf("## %s\n", part))...)
|
||||
header = fmt.Appendf(header, "## %s (sha256:%s)\n", part, hex.EncodeToString(hash))
|
||||
checksums[part] = hash
|
||||
}
|
||||
|
||||
var out bytes.Buffer
|
||||
@ -53,8 +66,8 @@ func Merge(parts []string) ([]byte, error) {
|
||||
_ = out.WriteByte('\n')
|
||||
|
||||
if err := toml.NewEncoder(&out).SetIndentTables(true).Encode(merged); err != nil {
|
||||
return nil, fmt.Errorf("error encoding merged config: %w", err)
|
||||
return nil, nil, fmt.Errorf("error encoding merged config: %w", err)
|
||||
}
|
||||
|
||||
return out.Bytes(), nil
|
||||
return out.Bytes(), checksums, nil
|
||||
}
|
||||
|
||||
@ -18,12 +18,15 @@ import (
|
||||
var expected []byte
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
out, err := toml.Merge([]string{
|
||||
out, checksums, err := toml.Merge([]string{
|
||||
"testdata/1.toml",
|
||||
"testdata/2.toml",
|
||||
"testdata/3.toml",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, expected, out)
|
||||
assert.Equal(t, string(expected), string(out))
|
||||
assert.Contains(t, checksums, "testdata/1.toml")
|
||||
assert.Contains(t, checksums, "testdata/2.toml")
|
||||
assert.Contains(t, checksums, "testdata/3.toml")
|
||||
}
|
||||
|
||||
6
internal/pkg/toml/testdata/expected.toml
vendored
6
internal/pkg/toml/testdata/expected.toml
vendored
@ -1,6 +1,6 @@
|
||||
## testdata/1.toml
|
||||
## testdata/2.toml
|
||||
## testdata/3.toml
|
||||
## testdata/1.toml (sha256:2eac71621235f8666c54ad3f29a77b2e8483bbd7f0717f8613af591fb5609b44)
|
||||
## testdata/2.toml (sha256:47ae85a638a291b04518413a12b19c51883a17c8f5064193462d3527b4495e36)
|
||||
## testdata/3.toml (sha256:159608dffd674e5fe351d47166eab59ee93f6523ff336602364edfd7be25c796)
|
||||
|
||||
version = 2
|
||||
|
||||
|
||||
@ -9,3 +9,6 @@ import "github.com/cosi-project/runtime/pkg/resource"
|
||||
|
||||
// NamespaceName contains file resources.
|
||||
const NamespaceName resource.Namespace = "files"
|
||||
|
||||
// SourceFileAnnotation is used to annotate a file resource with the source file path(s).
|
||||
const SourceFileAnnotation = "talos.dev/source-file"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user