diff --git a/changelog/_9466.txt b/changelog/_9466.txt new file mode 100644 index 0000000000..1ecd0cfdec --- /dev/null +++ b/changelog/_9466.txt @@ -0,0 +1,3 @@ +```release-note:bug +core (enterprise): Avoid duplicate seal rewrapping, and ensure that cluster secondaries rewrap after a seal migration. +``` diff --git a/helper/testhelpers/corehelpers/corehelpers.go b/helper/testhelpers/corehelpers/corehelpers.go index 1c7da787d7..9afc59c696 100644 --- a/helper/testhelpers/corehelpers/corehelpers.go +++ b/helper/testhelpers/corehelpers/corehelpers.go @@ -10,7 +10,6 @@ import ( "io" "os" "path/filepath" - "strings" "testing" "time" @@ -211,20 +210,13 @@ type TestLogger struct { } func NewTestLogger(t testing.TB) *TestLogger { - return NewTestLoggerWithSuffix(t, "") -} - -func NewTestLoggerWithSuffix(t testing.TB, logFileSuffix string) *TestLogger { var logFile *os.File var logPath string output := os.Stderr logDir := os.Getenv("VAULT_TEST_LOG_DIR") if logDir != "" { - if logFileSuffix != "" && !strings.HasPrefix(logFileSuffix, "_") { - logFileSuffix = "_" + logFileSuffix - } - logPath = filepath.Join(logDir, t.Name()+logFileSuffix+".log") + logPath = filepath.Join(logDir, t.Name()+".log") // t.Name may include slashes. dir, _ := filepath.Split(logPath) err := os.MkdirAll(dir, 0o755) diff --git a/sdk/helper/testcluster/docker/environment.go b/sdk/helper/testcluster/docker/environment.go index 15248c723e..21e6e16cc2 100644 --- a/sdk/helper/testcluster/docker/environment.go +++ b/sdk/helper/testcluster/docker/environment.go @@ -33,7 +33,6 @@ import ( "time" "github.com/docker/docker/api/types" - "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/volume" docker "github.com/docker/docker/client" "github.com/hashicorp/go-cleanhttp" @@ -79,6 +78,7 @@ type DockerCluster struct { storage testcluster.ClusterStorage disableMlock bool + disableTLS bool } func (dc *DockerCluster) NamedLogger(s string) log.Logger { @@ -134,6 +134,9 @@ func (dc *DockerCluster) SetRecoveryKeys(keys [][]byte) { } func (dc *DockerCluster) GetCACertPEMFile() string { + if dc.disableTLS { + return "" + } return testcluster.DefaultCAFile } @@ -449,6 +452,7 @@ func NewDockerCluster(ctx context.Context, opts *DockerClusterOptions) (*DockerC CA: opts.CA, storage: opts.Storage, disableMlock: opts.DisableMlock, + disableTLS: opts.DisableTLS, } if err := dc.setupDockerCluster(ctx, opts); err != nil { @@ -778,12 +782,23 @@ func (n *DockerClusterNode) Start(ctx context.Context, opts *DockerClusterOption } n.Logger.Trace(s) } + // If a container gets restarted, and we reissue a ContainerLogs call using + // Follow=true, we'll see all the logs already consumed before we start seeing + // new ones. Use lastTS to avoid duplication. + lastTS := "" logStdout := &LogConsumerWriter{logConsumer} logStderr := &LogConsumerWriter{func(s string) { if seenLogs.CAS(false, true) { wg.Done() } - testcluster.JSONLogNoTimestamp(n.Logger, s) + d := json.NewDecoder(strings.NewReader(s)) + m := map[string]any{} + if err := d.Decode(&m); err != nil { + n.Logger.Error("failed to decode json output from dev vault", "error", err, "input", s) + return + } + + lastTS = testcluster.JSONLogNoTimestampFromMap(n.Logger, lastTS, m) }} postStartFunc := func(containerID string, realIP string) error { @@ -920,7 +935,7 @@ func (n *DockerClusterNode) Pause(ctx context.Context) error { func (n *DockerClusterNode) Restart(ctx context.Context) error { timeout := 5 - err := n.DockerAPI.ContainerRestart(ctx, n.Container.ID, container.StopOptions{Timeout: &timeout}) + err := n.runner.RestartContainerWithTimeout(ctx, n.Container.ID, timeout) if err != nil { return err } diff --git a/sdk/helper/testcluster/logging.go b/sdk/helper/testcluster/logging.go index dda759c7f8..39873acd69 100644 --- a/sdk/helper/testcluster/logging.go +++ b/sdk/helper/testcluster/logging.go @@ -17,6 +17,18 @@ func JSONLogNoTimestamp(outlog hclog.Logger, text string) { outlog.Error("failed to decode json output from dev vault", "error", err, "input", text) return } + JSONLogNoTimestampFromMap(outlog, "", m) +} + +func JSONLogNoTimestampFromMap(outlog hclog.Logger, min string, m map[string]any) string { + timeStamp := "" + if ts, ok := m["@timestamp"]; ok { + timeStamp = ts.(string) + // This works because the time format is ISO8601. + if timeStamp < min { + return min + } + } delete(m, "@timestamp") message := m["@message"].(string) @@ -34,4 +46,5 @@ func JSONLogNoTimestamp(outlog hclog.Logger, text string) { } outlog.Log(hclog.LevelFromString(level), message, pairs...) + return timeStamp } diff --git a/vault/core.go b/vault/core.go index 0b511397c2..e6a1fc1f6d 100644 --- a/vault/core.go +++ b/vault/core.go @@ -323,7 +323,7 @@ type Core struct { // migrationInfo will be populated, which on enterprise may be necessary for // seal rewrap. migrationInfo *migrationInformation - sealMigrationDone *uint32 + sealMigrationDone atomic.Pointer[time.Time] // barrier is the security barrier wrapping the physical backend barrier SecurityBarrier @@ -1058,7 +1058,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) { authLock: authLock, router: NewRouter(), sealed: new(uint32), - sealMigrationDone: new(uint32), + sealMigrationDone: atomic.Pointer[time.Time]{}, standby: true, standbyStopCh: new(atomic.Value), baseLogger: conf.Logger, @@ -1930,7 +1930,8 @@ func (c *Core) getUnsealKey(ctx context.Context, seal Seal) ([]byte, error) { // if the preceding conditions are true but we cannot decrypt the master key // in storage using the configured seal. func (c *Core) sealMigrated(ctx context.Context) (bool, error) { - if atomic.LoadUint32(c.sealMigrationDone) == 1 { + sealMigDone := c.sealMigrationDone.Load() + if sealMigDone != nil && !sealMigDone.IsZero() { return true, nil } @@ -2072,7 +2073,7 @@ func (c *Core) migrateSeal(ctx context.Context) error { } // Flag migration performed for seal-rewrap later - atomic.StoreUint32(c.sealMigrationDone, 1) + c.SetSealMigrationDone() c.logger.Info("seal migration complete") return nil @@ -2569,10 +2570,11 @@ func (s standardUnsealStrategy) unseal(ctx context.Context, logger log.Logger, c if err := c.runUnsealSetupForPrimary(ctx, logger); err != nil { return err } - } else if c.IsMultisealEnabled() { - sealGenInfo := c.SealAccess().GetAccess().GetSealGenerationInfo() - if sealGenInfo != nil && !sealGenInfo.IsRewrapped() { - atomic.StoreUint32(c.sealMigrationDone, 1) + } + + if c.IsMultisealEnabled() { + if err := c.handleMultisealRewrapping(ctx, logger); err != nil { + return err } } @@ -2584,7 +2586,6 @@ func (s standardUnsealStrategy) unseal(ctx context.Context, logger log.Logger, c if err := c.startForwarding(ctx); err != nil { return err } - } c.clusterParamsLock.Lock() @@ -2742,59 +2743,66 @@ func runUnsealSetupFunctions(ctx context.Context, setupFunctions []func(context. // runUnsealSetupForPrimary runs some setup code specific to clusters that are // in the primary role (as defined by the (*Core).isPrimary method). func (c *Core) runUnsealSetupForPrimary(ctx context.Context, logger log.Logger) error { - if err := c.setupPluginReload(); err != nil { + return c.setupPluginReload() +} + +func (c *Core) handleMultisealRewrapping(ctx context.Context, logger log.Logger) error { + // Retrieve the seal generation information from storage + existingGenerationInfo, err := PhysicalSealGenInfo(ctx, c.physical) + if err != nil { + logger.Error("cannot read existing seal generation info from storage", "error", err) return err } - if c.IsMultisealEnabled() { - // Retrieve the seal generation information from storage - existingGenerationInfo, err := PhysicalSealGenInfo(ctx, c.physical) - if err != nil { - logger.Error("cannot read existing seal generation info from storage", "error", err) + sealGenerationInfo := c.seal.GetAccess().GetSealGenerationInfo() + shouldRewrap := !sealGenerationInfo.IsRewrapped() + var reason string + switch { + case existingGenerationInfo == nil: + reason = "First time storing seal generation information" + fallthrough + case existingGenerationInfo.Generation < sealGenerationInfo.Generation: + if reason == "" { + reason = fmt.Sprintf("Seal generation incremented from %d to %d", + existingGenerationInfo.Generation, sealGenerationInfo.Generation) + } + fallthrough + case !existingGenerationInfo.Enabled: + if reason == "" { + reason = fmt.Sprintf("Seal just become enabled again after previously being disabled") + } + if err := c.SetPhysicalSealGenInfo(ctx, sealGenerationInfo); err != nil { + logger.Error("failed to store seal generation info", "error", err) return err } - - sealGenerationInfo := c.seal.GetAccess().GetSealGenerationInfo() - shouldRewrap := !sealGenerationInfo.IsRewrapped() - switch { - case existingGenerationInfo == nil: - // This is the first time we store seal generation information - fallthrough - case existingGenerationInfo.Generation < sealGenerationInfo.Generation || !existingGenerationInfo.Enabled: - // We have incremented the seal generation or we've just become enabled again after previously being disabled, - // trust the operator in the latter case + shouldRewrap = true + case existingGenerationInfo.Generation == sealGenerationInfo.Generation: + // Same generation, update the rewrapped flag in case the previous active node + // changed its value. In other words, a rewrap may have happened, or a rewrap may have been + // started but not completed. + c.seal.GetAccess().GetSealGenerationInfo().SetRewrapped(existingGenerationInfo.IsRewrapped()) + if !existingGenerationInfo.Enabled { + // Weren't enabled but are now, persist the flag if err := c.SetPhysicalSealGenInfo(ctx, sealGenerationInfo); err != nil { logger.Error("failed to store seal generation info", "error", err) return err } - shouldRewrap = true - case existingGenerationInfo.Generation == sealGenerationInfo.Generation: - // Same generation, update the rewrapped flag in case the previous active node - // changed its value. In other words, a rewrap may have happened, or a rewrap may have been - // started but not completed. - c.seal.GetAccess().GetSealGenerationInfo().SetRewrapped(existingGenerationInfo.IsRewrapped()) - if !existingGenerationInfo.Enabled { - // Weren't enabled but are now, persist the flag - if err := c.SetPhysicalSealGenInfo(ctx, sealGenerationInfo); err != nil { - logger.Error("failed to store seal generation info", "error", err) - return err - } - } - shouldRewrap = !existingGenerationInfo.IsRewrapped() - case existingGenerationInfo.Generation > sealGenerationInfo.Generation: - // Our seal information is out of date. The previous active node used a newer generation. - logger.Error("A newer seal generation was found in storage. The seal configuration in this node should be updated to match that of the previous active node, and this node should be restarted.") - return errors.New("newer seal generation found in storage, in memory seal configuration is out of date") } - if shouldRewrap { - // Set the migration done flag so that a seal-rewrap gets triggered later. - // Note that in the case where multi seal is not supported, Core.migrateSeal() takes care of - // triggering the rewrap when necessary. - logger.Trace("seal generation information indicates that a seal-rewrap is needed", "generation", sealGenerationInfo.Generation) - atomic.StoreUint32(c.sealMigrationDone, 1) - } - startPartialSealRewrapping(c) + reason = "A rewrap may have happened, or a rewrap may have been started but not completed" + shouldRewrap = !existingGenerationInfo.IsRewrapped() + case existingGenerationInfo.Generation > sealGenerationInfo.Generation: + // Our seal information is out of date. The previous active node used a newer generation. + logger.Error("A newer seal generation was found in storage. The seal configuration in this node should be updated to match that of the previous active node, and this node should be restarted.") + return errors.New("newer seal generation found in storage, in memory seal configuration is out of date") } + if shouldRewrap { + // Set the migration done flag so that a seal-rewrap gets triggered later. + // Note that in the case where multi seal is not supported, Core.migrateSeal() takes care of + // triggering the rewrap when necessary. + logger.Info("seal generation information indicates that a seal-rewrap is needed", "generation", sealGenerationInfo.Generation, "reason", reason) + c.SetSealMigrationDone() + } + startPartialSealRewrapping(c) return nil } @@ -2902,10 +2910,8 @@ func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc, close(jobs) } - if atomic.LoadUint32(c.sealMigrationDone) == 1 { - if err := c.postSealMigration(ctx); err != nil { - c.logger.Warn("post-unseal post seal migration failed", "error", err) - } + if err := c.postSealMigration(ctx); err != nil { + c.logger.Warn("post-unseal post seal migration failed", "error", err) } if os.Getenv(EnvVaultDisableLocalAuthMountEntities) != "" { @@ -4758,4 +4764,9 @@ func (c *Core) shutdownRemovedNode() { }() } +func (c *Core) SetSealMigrationDone() { + now := time.Now() + c.sealMigrationDone.Store(&now) +} + var errRemovedHANode = errors.New("node has been removed from the HA cluster")