mirror of
https://github.com/hashicorp/vault.git
synced 2026-01-08 18:21:12 +01:00
This commit is contained in:
parent
68dde4028d
commit
cb2f69ee02
3
changelog/_9466.txt
Normal file
3
changelog/_9466.txt
Normal file
@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
core (enterprise): Avoid duplicate seal rewrapping, and ensure that cluster secondaries rewrap after a seal migration.
|
||||
```
|
||||
@ -10,7 +10,6 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -211,20 +210,13 @@ type TestLogger struct {
|
||||
}
|
||||
|
||||
func NewTestLogger(t testing.TB) *TestLogger {
|
||||
return NewTestLoggerWithSuffix(t, "")
|
||||
}
|
||||
|
||||
func NewTestLoggerWithSuffix(t testing.TB, logFileSuffix string) *TestLogger {
|
||||
var logFile *os.File
|
||||
var logPath string
|
||||
output := os.Stderr
|
||||
|
||||
logDir := os.Getenv("VAULT_TEST_LOG_DIR")
|
||||
if logDir != "" {
|
||||
if logFileSuffix != "" && !strings.HasPrefix(logFileSuffix, "_") {
|
||||
logFileSuffix = "_" + logFileSuffix
|
||||
}
|
||||
logPath = filepath.Join(logDir, t.Name()+logFileSuffix+".log")
|
||||
logPath = filepath.Join(logDir, t.Name()+".log")
|
||||
// t.Name may include slashes.
|
||||
dir, _ := filepath.Split(logPath)
|
||||
err := os.MkdirAll(dir, 0o755)
|
||||
|
||||
@ -33,7 +33,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/volume"
|
||||
docker "github.com/docker/docker/client"
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
@ -79,6 +78,7 @@ type DockerCluster struct {
|
||||
|
||||
storage testcluster.ClusterStorage
|
||||
disableMlock bool
|
||||
disableTLS bool
|
||||
}
|
||||
|
||||
func (dc *DockerCluster) NamedLogger(s string) log.Logger {
|
||||
@ -134,6 +134,9 @@ func (dc *DockerCluster) SetRecoveryKeys(keys [][]byte) {
|
||||
}
|
||||
|
||||
func (dc *DockerCluster) GetCACertPEMFile() string {
|
||||
if dc.disableTLS {
|
||||
return ""
|
||||
}
|
||||
return testcluster.DefaultCAFile
|
||||
}
|
||||
|
||||
@ -449,6 +452,7 @@ func NewDockerCluster(ctx context.Context, opts *DockerClusterOptions) (*DockerC
|
||||
CA: opts.CA,
|
||||
storage: opts.Storage,
|
||||
disableMlock: opts.DisableMlock,
|
||||
disableTLS: opts.DisableTLS,
|
||||
}
|
||||
|
||||
if err := dc.setupDockerCluster(ctx, opts); err != nil {
|
||||
@ -778,12 +782,23 @@ func (n *DockerClusterNode) Start(ctx context.Context, opts *DockerClusterOption
|
||||
}
|
||||
n.Logger.Trace(s)
|
||||
}
|
||||
// If a container gets restarted, and we reissue a ContainerLogs call using
|
||||
// Follow=true, we'll see all the logs already consumed before we start seeing
|
||||
// new ones. Use lastTS to avoid duplication.
|
||||
lastTS := ""
|
||||
logStdout := &LogConsumerWriter{logConsumer}
|
||||
logStderr := &LogConsumerWriter{func(s string) {
|
||||
if seenLogs.CAS(false, true) {
|
||||
wg.Done()
|
||||
}
|
||||
testcluster.JSONLogNoTimestamp(n.Logger, s)
|
||||
d := json.NewDecoder(strings.NewReader(s))
|
||||
m := map[string]any{}
|
||||
if err := d.Decode(&m); err != nil {
|
||||
n.Logger.Error("failed to decode json output from dev vault", "error", err, "input", s)
|
||||
return
|
||||
}
|
||||
|
||||
lastTS = testcluster.JSONLogNoTimestampFromMap(n.Logger, lastTS, m)
|
||||
}}
|
||||
|
||||
postStartFunc := func(containerID string, realIP string) error {
|
||||
@ -920,7 +935,7 @@ func (n *DockerClusterNode) Pause(ctx context.Context) error {
|
||||
|
||||
func (n *DockerClusterNode) Restart(ctx context.Context) error {
|
||||
timeout := 5
|
||||
err := n.DockerAPI.ContainerRestart(ctx, n.Container.ID, container.StopOptions{Timeout: &timeout})
|
||||
err := n.runner.RestartContainerWithTimeout(ctx, n.Container.ID, timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -17,6 +17,18 @@ func JSONLogNoTimestamp(outlog hclog.Logger, text string) {
|
||||
outlog.Error("failed to decode json output from dev vault", "error", err, "input", text)
|
||||
return
|
||||
}
|
||||
JSONLogNoTimestampFromMap(outlog, "", m)
|
||||
}
|
||||
|
||||
func JSONLogNoTimestampFromMap(outlog hclog.Logger, min string, m map[string]any) string {
|
||||
timeStamp := ""
|
||||
if ts, ok := m["@timestamp"]; ok {
|
||||
timeStamp = ts.(string)
|
||||
// This works because the time format is ISO8601.
|
||||
if timeStamp < min {
|
||||
return min
|
||||
}
|
||||
}
|
||||
|
||||
delete(m, "@timestamp")
|
||||
message := m["@message"].(string)
|
||||
@ -34,4 +46,5 @@ func JSONLogNoTimestamp(outlog hclog.Logger, text string) {
|
||||
}
|
||||
|
||||
outlog.Log(hclog.LevelFromString(level), message, pairs...)
|
||||
return timeStamp
|
||||
}
|
||||
|
||||
121
vault/core.go
121
vault/core.go
@ -323,7 +323,7 @@ type Core struct {
|
||||
// migrationInfo will be populated, which on enterprise may be necessary for
|
||||
// seal rewrap.
|
||||
migrationInfo *migrationInformation
|
||||
sealMigrationDone *uint32
|
||||
sealMigrationDone atomic.Pointer[time.Time]
|
||||
|
||||
// barrier is the security barrier wrapping the physical backend
|
||||
barrier SecurityBarrier
|
||||
@ -1058,7 +1058,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
|
||||
authLock: authLock,
|
||||
router: NewRouter(),
|
||||
sealed: new(uint32),
|
||||
sealMigrationDone: new(uint32),
|
||||
sealMigrationDone: atomic.Pointer[time.Time]{},
|
||||
standby: true,
|
||||
standbyStopCh: new(atomic.Value),
|
||||
baseLogger: conf.Logger,
|
||||
@ -1930,7 +1930,8 @@ func (c *Core) getUnsealKey(ctx context.Context, seal Seal) ([]byte, error) {
|
||||
// if the preceding conditions are true but we cannot decrypt the master key
|
||||
// in storage using the configured seal.
|
||||
func (c *Core) sealMigrated(ctx context.Context) (bool, error) {
|
||||
if atomic.LoadUint32(c.sealMigrationDone) == 1 {
|
||||
sealMigDone := c.sealMigrationDone.Load()
|
||||
if sealMigDone != nil && !sealMigDone.IsZero() {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@ -2072,7 +2073,7 @@ func (c *Core) migrateSeal(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// Flag migration performed for seal-rewrap later
|
||||
atomic.StoreUint32(c.sealMigrationDone, 1)
|
||||
c.SetSealMigrationDone()
|
||||
|
||||
c.logger.Info("seal migration complete")
|
||||
return nil
|
||||
@ -2569,10 +2570,11 @@ func (s standardUnsealStrategy) unseal(ctx context.Context, logger log.Logger, c
|
||||
if err := c.runUnsealSetupForPrimary(ctx, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if c.IsMultisealEnabled() {
|
||||
sealGenInfo := c.SealAccess().GetAccess().GetSealGenerationInfo()
|
||||
if sealGenInfo != nil && !sealGenInfo.IsRewrapped() {
|
||||
atomic.StoreUint32(c.sealMigrationDone, 1)
|
||||
}
|
||||
|
||||
if c.IsMultisealEnabled() {
|
||||
if err := c.handleMultisealRewrapping(ctx, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@ -2584,7 +2586,6 @@ func (s standardUnsealStrategy) unseal(ctx context.Context, logger log.Logger, c
|
||||
if err := c.startForwarding(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
c.clusterParamsLock.Lock()
|
||||
@ -2742,59 +2743,66 @@ func runUnsealSetupFunctions(ctx context.Context, setupFunctions []func(context.
|
||||
// runUnsealSetupForPrimary runs some setup code specific to clusters that are
|
||||
// in the primary role (as defined by the (*Core).isPrimary method).
|
||||
func (c *Core) runUnsealSetupForPrimary(ctx context.Context, logger log.Logger) error {
|
||||
if err := c.setupPluginReload(); err != nil {
|
||||
return c.setupPluginReload()
|
||||
}
|
||||
|
||||
func (c *Core) handleMultisealRewrapping(ctx context.Context, logger log.Logger) error {
|
||||
// Retrieve the seal generation information from storage
|
||||
existingGenerationInfo, err := PhysicalSealGenInfo(ctx, c.physical)
|
||||
if err != nil {
|
||||
logger.Error("cannot read existing seal generation info from storage", "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if c.IsMultisealEnabled() {
|
||||
// Retrieve the seal generation information from storage
|
||||
existingGenerationInfo, err := PhysicalSealGenInfo(ctx, c.physical)
|
||||
if err != nil {
|
||||
logger.Error("cannot read existing seal generation info from storage", "error", err)
|
||||
sealGenerationInfo := c.seal.GetAccess().GetSealGenerationInfo()
|
||||
shouldRewrap := !sealGenerationInfo.IsRewrapped()
|
||||
var reason string
|
||||
switch {
|
||||
case existingGenerationInfo == nil:
|
||||
reason = "First time storing seal generation information"
|
||||
fallthrough
|
||||
case existingGenerationInfo.Generation < sealGenerationInfo.Generation:
|
||||
if reason == "" {
|
||||
reason = fmt.Sprintf("Seal generation incremented from %d to %d",
|
||||
existingGenerationInfo.Generation, sealGenerationInfo.Generation)
|
||||
}
|
||||
fallthrough
|
||||
case !existingGenerationInfo.Enabled:
|
||||
if reason == "" {
|
||||
reason = fmt.Sprintf("Seal just become enabled again after previously being disabled")
|
||||
}
|
||||
if err := c.SetPhysicalSealGenInfo(ctx, sealGenerationInfo); err != nil {
|
||||
logger.Error("failed to store seal generation info", "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
sealGenerationInfo := c.seal.GetAccess().GetSealGenerationInfo()
|
||||
shouldRewrap := !sealGenerationInfo.IsRewrapped()
|
||||
switch {
|
||||
case existingGenerationInfo == nil:
|
||||
// This is the first time we store seal generation information
|
||||
fallthrough
|
||||
case existingGenerationInfo.Generation < sealGenerationInfo.Generation || !existingGenerationInfo.Enabled:
|
||||
// We have incremented the seal generation or we've just become enabled again after previously being disabled,
|
||||
// trust the operator in the latter case
|
||||
shouldRewrap = true
|
||||
case existingGenerationInfo.Generation == sealGenerationInfo.Generation:
|
||||
// Same generation, update the rewrapped flag in case the previous active node
|
||||
// changed its value. In other words, a rewrap may have happened, or a rewrap may have been
|
||||
// started but not completed.
|
||||
c.seal.GetAccess().GetSealGenerationInfo().SetRewrapped(existingGenerationInfo.IsRewrapped())
|
||||
if !existingGenerationInfo.Enabled {
|
||||
// Weren't enabled but are now, persist the flag
|
||||
if err := c.SetPhysicalSealGenInfo(ctx, sealGenerationInfo); err != nil {
|
||||
logger.Error("failed to store seal generation info", "error", err)
|
||||
return err
|
||||
}
|
||||
shouldRewrap = true
|
||||
case existingGenerationInfo.Generation == sealGenerationInfo.Generation:
|
||||
// Same generation, update the rewrapped flag in case the previous active node
|
||||
// changed its value. In other words, a rewrap may have happened, or a rewrap may have been
|
||||
// started but not completed.
|
||||
c.seal.GetAccess().GetSealGenerationInfo().SetRewrapped(existingGenerationInfo.IsRewrapped())
|
||||
if !existingGenerationInfo.Enabled {
|
||||
// Weren't enabled but are now, persist the flag
|
||||
if err := c.SetPhysicalSealGenInfo(ctx, sealGenerationInfo); err != nil {
|
||||
logger.Error("failed to store seal generation info", "error", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
shouldRewrap = !existingGenerationInfo.IsRewrapped()
|
||||
case existingGenerationInfo.Generation > sealGenerationInfo.Generation:
|
||||
// Our seal information is out of date. The previous active node used a newer generation.
|
||||
logger.Error("A newer seal generation was found in storage. The seal configuration in this node should be updated to match that of the previous active node, and this node should be restarted.")
|
||||
return errors.New("newer seal generation found in storage, in memory seal configuration is out of date")
|
||||
}
|
||||
if shouldRewrap {
|
||||
// Set the migration done flag so that a seal-rewrap gets triggered later.
|
||||
// Note that in the case where multi seal is not supported, Core.migrateSeal() takes care of
|
||||
// triggering the rewrap when necessary.
|
||||
logger.Trace("seal generation information indicates that a seal-rewrap is needed", "generation", sealGenerationInfo.Generation)
|
||||
atomic.StoreUint32(c.sealMigrationDone, 1)
|
||||
}
|
||||
startPartialSealRewrapping(c)
|
||||
reason = "A rewrap may have happened, or a rewrap may have been started but not completed"
|
||||
shouldRewrap = !existingGenerationInfo.IsRewrapped()
|
||||
case existingGenerationInfo.Generation > sealGenerationInfo.Generation:
|
||||
// Our seal information is out of date. The previous active node used a newer generation.
|
||||
logger.Error("A newer seal generation was found in storage. The seal configuration in this node should be updated to match that of the previous active node, and this node should be restarted.")
|
||||
return errors.New("newer seal generation found in storage, in memory seal configuration is out of date")
|
||||
}
|
||||
if shouldRewrap {
|
||||
// Set the migration done flag so that a seal-rewrap gets triggered later.
|
||||
// Note that in the case where multi seal is not supported, Core.migrateSeal() takes care of
|
||||
// triggering the rewrap when necessary.
|
||||
logger.Info("seal generation information indicates that a seal-rewrap is needed", "generation", sealGenerationInfo.Generation, "reason", reason)
|
||||
c.SetSealMigrationDone()
|
||||
}
|
||||
startPartialSealRewrapping(c)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -2902,10 +2910,8 @@ func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc,
|
||||
close(jobs)
|
||||
}
|
||||
|
||||
if atomic.LoadUint32(c.sealMigrationDone) == 1 {
|
||||
if err := c.postSealMigration(ctx); err != nil {
|
||||
c.logger.Warn("post-unseal post seal migration failed", "error", err)
|
||||
}
|
||||
if err := c.postSealMigration(ctx); err != nil {
|
||||
c.logger.Warn("post-unseal post seal migration failed", "error", err)
|
||||
}
|
||||
|
||||
if os.Getenv(EnvVaultDisableLocalAuthMountEntities) != "" {
|
||||
@ -4758,4 +4764,9 @@ func (c *Core) shutdownRemovedNode() {
|
||||
}()
|
||||
}
|
||||
|
||||
func (c *Core) SetSealMigrationDone() {
|
||||
now := time.Now()
|
||||
c.sealMigrationDone.Store(&now)
|
||||
}
|
||||
|
||||
var errRemovedHANode = errors.New("node has been removed from the HA cluster")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user