mirror of
https://github.com/hashicorp/vault.git
synced 2025-09-20 21:31:09 +02:00
VAULT-31409: trace postUnseal function (#28895)
* initial implementation of unseal trace * close file if we fail to start the trace didn't bother to check the error from traceFile.Close() * use reloadable config instead of env var * license * remove leftover * allow setting custom dir and remove new package * bring back StartDebugTrace after talking to Kuba it sounds like it's a good idea to try to move stuff out of core, so even if there's no immediate need for a generic debug trace function it's still fair to add it * track postUnseal instead of unsealInternal also some usability improvements from manual testing * address PR comments * address security review there were concerns about using the /tmp directory because of permissions, or having a default dir at all, so now it's required to set a dir in order to generate the traces. * add unit tests to StartDebugTrace * move back to default dir * document new parameters * add tiny integration test * avoid column in trace filename sounds like it might be forbidden in Windows and possibly cause problems in some MacOS applications. * address PR feedback * add go doc to test CI was complaining about missing comments on the new test function. It feels a bit silly to require this of tests but whatever XD * fix tests
This commit is contained in:
parent
42552f6303
commit
a2c467cc22
3
changelog/28895.txt
Normal file
3
changelog/28895.txt
Normal file
@ -0,0 +1,3 @@
|
||||
```release-note:improvement
|
||||
core: Added new `enable_post_unseal_trace` and `post_unseal_trace_directory` config options to generate Go traces during the post-unseal step for debug purposes.
|
||||
```
|
@ -115,6 +115,9 @@ type Config struct {
|
||||
License string `hcl:"-"`
|
||||
LicensePath string `hcl:"license_path"`
|
||||
DisableSSCTokens bool `hcl:"-"`
|
||||
|
||||
EnablePostUnsealTrace bool `hcl:"enable_post_unseal_trace"`
|
||||
PostUnsealTraceDir string `hcl:"post_unseal_trace_directory"`
|
||||
}
|
||||
|
||||
const (
|
||||
@ -425,6 +428,16 @@ func (c *Config) Merge(c2 *Config) *Config {
|
||||
result.LicensePath = c2.LicensePath
|
||||
}
|
||||
|
||||
result.EnablePostUnsealTrace = c.EnablePostUnsealTrace
|
||||
if c2.EnablePostUnsealTrace {
|
||||
result.EnablePostUnsealTrace = c2.EnablePostUnsealTrace
|
||||
}
|
||||
|
||||
result.PostUnsealTraceDir = c.PostUnsealTraceDir
|
||||
if c2.PostUnsealTraceDir != "" {
|
||||
result.PostUnsealTraceDir = c2.PostUnsealTraceDir
|
||||
}
|
||||
|
||||
// Use values from top-level configuration for storage if set
|
||||
if storage := result.Storage; storage != nil {
|
||||
if result.APIAddr != "" {
|
||||
@ -1150,6 +1163,9 @@ func (c *Config) Sanitized() map[string]interface{} {
|
||||
"detect_deadlocks": c.DetectDeadlocks,
|
||||
|
||||
"imprecise_lease_role_tracking": c.ImpreciseLeaseRoleTracking,
|
||||
|
||||
"enable_post_unseal_trace": c.EnablePostUnsealTrace,
|
||||
"post_unseal_trace_directory": c.PostUnsealTraceDir,
|
||||
}
|
||||
for k, v := range sharedResult {
|
||||
result[k] = v
|
||||
|
@ -854,6 +854,8 @@ func testConfig_Sanitized(t *testing.T) {
|
||||
},
|
||||
"administrative_namespace_path": "admin/",
|
||||
"imprecise_lease_role_tracking": false,
|
||||
"enable_post_unseal_trace": true,
|
||||
"post_unseal_trace_directory": "/tmp",
|
||||
}
|
||||
|
||||
addExpectedEntSanitizedConfig(expected, []string{"http"})
|
||||
|
@ -58,3 +58,5 @@ raw_storage_endpoint = true
|
||||
disable_sealwrap = true
|
||||
disable_sentinel_trace = true
|
||||
administrative_namespace_path = "admin/"
|
||||
enable_post_unseal_trace = true
|
||||
post_unseal_trace_directory = "/tmp"
|
||||
|
63
helper/trace/debug_trace.go
Normal file
63
helper/trace/debug_trace.go
Normal file
@ -0,0 +1,63 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package trace
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/trace"
|
||||
"time"
|
||||
)
|
||||
|
||||
func StartDebugTrace(dir string, filePrefix string) (file string, stop func() error, err error) {
|
||||
dirMustExist := true
|
||||
if dir == "" {
|
||||
dirMustExist = false // if a dir is provided it must exist, otherwise we'll create a default one
|
||||
dir = filepath.Join(os.TempDir(), "vault-traces")
|
||||
}
|
||||
|
||||
d, err := os.Stat(dir)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return "", nil, fmt.Errorf("failed to stat trace directory %q: %s", dir, err)
|
||||
}
|
||||
|
||||
if os.IsNotExist(err) && dirMustExist {
|
||||
return "", nil, fmt.Errorf("trace directory %q does not exist", dir)
|
||||
}
|
||||
|
||||
if !os.IsNotExist(err) && !d.IsDir() {
|
||||
return "", nil, fmt.Errorf("trace directory %q is not a directory", dir)
|
||||
}
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
if err := os.Mkdir(dir, 0o700); err != nil {
|
||||
return "", nil, fmt.Errorf("failed to create trace directory %q: %s", dir, err)
|
||||
}
|
||||
}
|
||||
|
||||
// would prefer a more human readable time reference in the file name but the column
|
||||
// character can cause problems in filenames
|
||||
fileName := fmt.Sprintf("%s-%d.trace", filePrefix, time.Now().Unix())
|
||||
traceFile, err := filepath.Abs(filepath.Join(dir, fileName))
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("failed to get absolute path for trace file: %s", err)
|
||||
}
|
||||
f, err := os.Create(traceFile)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("failed to create trace file %q: %s", traceFile, err)
|
||||
}
|
||||
|
||||
if err := trace.Start(f); err != nil {
|
||||
f.Close()
|
||||
return "", nil, fmt.Errorf("failed to start trace: %s", err)
|
||||
}
|
||||
|
||||
stop = func() error {
|
||||
trace.Stop()
|
||||
return f.Close()
|
||||
}
|
||||
|
||||
return f.Name(), stop, nil
|
||||
}
|
111
helper/trace/debug_trace_test.go
Normal file
111
helper/trace/debug_trace_test.go
Normal file
@ -0,0 +1,111 @@
|
||||
// Copyright (c) HashiCorp, Inc.
|
||||
// SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
package trace
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestStartDebugTrace tests the debug trace functionality creating real
|
||||
// files and traces.
|
||||
func TestStartDebugTrace(t *testing.T) {
|
||||
t.Run("error_on_non_existent_dir", func(t *testing.T) {
|
||||
_, _, err := StartDebugTrace("non-existent-dir", "filePrefix")
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "does not exist")
|
||||
})
|
||||
|
||||
t.Run("error_on_non_dir", func(t *testing.T) {
|
||||
f, err := os.CreateTemp("", "")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, f.Close())
|
||||
_, _, err = StartDebugTrace(f.Name(), "")
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "is not a directory")
|
||||
})
|
||||
|
||||
t.Run("error_on_failed_to_create_trace_file", func(t *testing.T) {
|
||||
noWriteFolder := filepath.Join(os.TempDir(), "no-write-permissions")
|
||||
// create folder without write permission
|
||||
err := os.Mkdir(noWriteFolder, 0o000)
|
||||
t.Cleanup(func() {
|
||||
os.RemoveAll(noWriteFolder)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
_, _, err = StartDebugTrace(noWriteFolder, "")
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "failed to create trace file")
|
||||
})
|
||||
|
||||
t.Run("error_trying_to_start_second_concurrent_trace", func(t *testing.T) {
|
||||
dir, err := os.MkdirTemp("", "")
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
os.RemoveAll(dir)
|
||||
})
|
||||
_, stop, err := StartDebugTrace(dir, "filePrefix")
|
||||
require.NoError(t, err)
|
||||
_, stopNil, err := StartDebugTrace(dir, "filePrefix")
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "failed to start trace")
|
||||
require.NoError(t, stop())
|
||||
require.Nil(t, stopNil)
|
||||
})
|
||||
|
||||
t.Run("error_when_stating_tmp_dir_with_restricted_permissions", func(t *testing.T) {
|
||||
// this test relies on setting TMPDIR so skip it if we're not on a Unix system
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("skipping test on Windows")
|
||||
}
|
||||
|
||||
tmpMissingPermissions := filepath.Join(t.TempDir(), "missing_permissions")
|
||||
err := os.Mkdir(tmpMissingPermissions, 0o000)
|
||||
require.NoError(t, err)
|
||||
t.Setenv("TMPDIR", tmpMissingPermissions)
|
||||
_, _, err = StartDebugTrace("", "filePrefix")
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "failed to stat trace directory")
|
||||
})
|
||||
|
||||
t.Run("successful_trace_generates_non_empty_file", func(t *testing.T) {
|
||||
dir, err := os.MkdirTemp("", "")
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
os.RemoveAll(dir)
|
||||
})
|
||||
file, stop, err := StartDebugTrace(dir, "filePrefix")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, stop())
|
||||
f, err := os.Stat(file)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, f.Size(), int64(0))
|
||||
})
|
||||
|
||||
t.Run("successful_creation_of_tmp_dir", func(t *testing.T) {
|
||||
os.RemoveAll(filepath.Join(os.TempDir(), "vault-traces"))
|
||||
file, stop, err := StartDebugTrace("", "filePrefix")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, stop())
|
||||
require.Contains(t, file, filepath.Join(os.TempDir(), "vault-traces", "filePrefix"))
|
||||
f, err := os.Stat(file)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, f.Size(), int64(0))
|
||||
})
|
||||
|
||||
t.Run("successful_trace_with_existing_tmp_dir", func(t *testing.T) {
|
||||
os.Mkdir(filepath.Join(os.TempDir(), "vault-traces"), 0o700)
|
||||
file, stop, err := StartDebugTrace("", "filePrefix")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, stop())
|
||||
require.Contains(t, file, filepath.Join(os.TempDir(), "vault-traces", "filePrefix"))
|
||||
f, err := os.Stat(file)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, f.Size(), int64(0))
|
||||
})
|
||||
}
|
@ -177,6 +177,8 @@ func TestSysConfigState_Sanitized(t *testing.T) {
|
||||
"storage": tc.expectedStorageOutput,
|
||||
"administrative_namespace_path": "",
|
||||
"imprecise_lease_role_tracking": false,
|
||||
"enable_post_unseal_trace": false,
|
||||
"post_unseal_trace_directory": "",
|
||||
}
|
||||
|
||||
if tc.expectedHAStorageOutput != nil {
|
||||
|
@ -50,6 +50,7 @@ import (
|
||||
"github.com/hashicorp/vault/helper/metricsutil"
|
||||
"github.com/hashicorp/vault/helper/namespace"
|
||||
"github.com/hashicorp/vault/helper/osutil"
|
||||
"github.com/hashicorp/vault/helper/trace"
|
||||
"github.com/hashicorp/vault/physical/raft"
|
||||
"github.com/hashicorp/vault/sdk/helper/certutil"
|
||||
"github.com/hashicorp/vault/sdk/helper/consts"
|
||||
@ -2704,6 +2705,10 @@ func (c *Core) runUnsealSetupForPrimary(ctx context.Context, logger log.Logger)
|
||||
// requires the Vault to be unsealed such as mount tables, logical backends,
|
||||
// credential stores, etc.
|
||||
func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc, unsealer UnsealStrategy) (retErr error) {
|
||||
if stopTrace := c.tracePostUnsealIfEnabled(); stopTrace != nil {
|
||||
defer stopTrace()
|
||||
}
|
||||
|
||||
defer metrics.MeasureSince([]string{"core", "post_unseal"}, time.Now())
|
||||
|
||||
// Clear any out
|
||||
@ -2819,6 +2824,41 @@ func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc,
|
||||
return nil
|
||||
}
|
||||
|
||||
// tracePostUnsealIfEnabled checks if post-unseal tracing is enabled in the server
|
||||
// config and starts a go trace if it is, returning a stop function to be called once
|
||||
// the post-unseal process is complete.
|
||||
func (c *Core) tracePostUnsealIfEnabled() (stop func()) {
|
||||
// use rawConfig to allow config hot-reload of EnablePostUnsealTrace via SIGHUP
|
||||
conf := c.rawConfig.Load()
|
||||
if conf == nil {
|
||||
c.logger.Warn("failed to get raw config to check enable_post_unseal_trace")
|
||||
return nil
|
||||
}
|
||||
|
||||
if !conf.(*server.Config).EnablePostUnsealTrace {
|
||||
return nil
|
||||
}
|
||||
|
||||
dir := conf.(*server.Config).PostUnsealTraceDir
|
||||
|
||||
traceFile, stopTrace, err := trace.StartDebugTrace(dir, "post-unseal")
|
||||
if err != nil {
|
||||
c.logger.Warn("failed to start post-unseal trace", "error", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
c.logger.Info("post-unseal trace started", "file", traceFile)
|
||||
|
||||
return func() {
|
||||
err := stopTrace()
|
||||
if err != nil {
|
||||
c.logger.Warn("failure when stopping post-unseal trace", "error", err)
|
||||
return
|
||||
}
|
||||
c.logger.Info("post-unseal trace completed", "file", traceFile)
|
||||
}
|
||||
}
|
||||
|
||||
// preSeal is invoked before the barrier is sealed, allowing
|
||||
// for any state teardown required.
|
||||
func (c *Core) preSeal() error {
|
||||
|
@ -7,20 +7,33 @@ import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
"github.com/hashicorp/vault/command/server"
|
||||
vaulthttp "github.com/hashicorp/vault/http"
|
||||
"github.com/hashicorp/vault/sdk/helper/testhelpers/schema"
|
||||
"github.com/hashicorp/vault/vault"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/net/http2"
|
||||
)
|
||||
|
||||
func TestSysPprof(t *testing.T) {
|
||||
t.Parallel()
|
||||
cluster := vault.NewTestCluster(t, nil, &vault.TestClusterOptions{
|
||||
|
||||
// trace test setup
|
||||
dir, err := os.MkdirTemp("", "vault-trace-test")
|
||||
require.NoError(t, err)
|
||||
|
||||
cluster := vault.NewTestCluster(t, &vault.CoreConfig{
|
||||
RawConfig: &server.Config{
|
||||
EnablePostUnsealTrace: true,
|
||||
PostUnsealTraceDir: dir,
|
||||
},
|
||||
}, &vault.TestClusterOptions{
|
||||
HandlerFunc: vaulthttp.Handler,
|
||||
RequestResponseCallback: schema.ResponseValidatingCallback(t),
|
||||
})
|
||||
@ -30,6 +43,14 @@ func TestSysPprof(t *testing.T) {
|
||||
core := cluster.Cores[0].Core
|
||||
vault.TestWaitActive(t, core)
|
||||
SysPprof_Test(t, cluster)
|
||||
|
||||
// draft trace test onto pprof one to avoid increasing test runtime with additional clusters
|
||||
files, err := os.ReadDir(dir)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, len(files), 0)
|
||||
traceFile, err := files[0].Info()
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, traceFile.Size(), int64(0))
|
||||
}
|
||||
|
||||
func TestSysPprof_MaxRequestDuration(t *testing.T) {
|
||||
|
@ -260,6 +260,17 @@ can have a negative effect on performance due to the tracking of each lock attem
|
||||
When `imprecise_lease_role_tracking` is set to true and a new role-based quota is enabled, subsequent lease counts start from 0.
|
||||
`imprecise_lease_role_tracking` affects role-based lease count quotas, but reduces latencies when not using role based quotas.
|
||||
|
||||
- `enable_post_unseal_trace` `(bool: false)` - Enables the server to generate a Go trace during the execution of the
|
||||
`core.postUnseal` function for debug purposes. The resulting trace can be viewed with the `go tool trace` command. The output
|
||||
directory can be specified with the `post_unseal_trace_directory` parameter. This should only be enabled temporarily for
|
||||
debugging purposes as it can have a significant performance impact. This can be updated on a running Vault process with a
|
||||
SIGHUP signal.
|
||||
|
||||
- `post_unseal_trace_directory` `(string: "")` - Specifies the directory where the trace file will be written, which must exist
|
||||
and be writable by the Vault process. If not specified it will create a subdirectory `vault-traces` under the result from
|
||||
[os.TempDir()](https://pkg.go.dev/os#TempDir) (usually `/tmp` on Unix systems). This can be updated on a running Vault process
|
||||
with a SIGHUP signal.
|
||||
|
||||
### High availability parameters
|
||||
|
||||
The following parameters are used on backends that support [high availability][high-availability].
|
||||
|
Loading…
x
Reference in New Issue
Block a user