KV HWM Metrics (#11596) (#11725)

* Added kv HWM metrics and a local test

* Added go docs and some helper functions

* Just added kv secret hwm to endpoint for tracking

* Fixed some small things and added one more test

* Fix a race test

* Added require'

Co-authored-by: divyaac <divya.chandrasekaran@hashicorp.com>
This commit is contained in:
Vault Automation 2026-01-13 10:46:02 -08:00 committed by GitHub
parent 879f7d1a7f
commit 42fa6ab32a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 298 additions and 6 deletions

View File

@ -13,11 +13,12 @@ const (
BillingSubPath = "billing/"
ReplicatedPrefix = "replicated/"
RoleHWMCountsHWM = "maxRoleCounts/"
KvHWMCountsHWM = "maxKvCounts/"
LocalPrefix = "local/"
BillingWriteInterval = 10 * time.Minute
)
var BillingMonthStorageFormat = "%s/%d/%02d/%s"
var BillingMonthStorageFormat = "%s%d/%02d/%s" // e.g replicated/2026/01/maxKvCounts/
type ConsumptionBilling struct {
// BillingStorageLock controls access to the billing storage paths
@ -32,7 +33,10 @@ type BillingConfig struct {
}
func GetMonthlyBillingPath(localPrefix string, now time.Time, billingMetric string) string {
// Normalize to avoid double slashes since our prefixes include trailing "/".
// Example: localPrefix="replicated/", billingMetric="maxKvCounts/" =>
// "replicated/2026/01/maxKvCounts/"
year := now.Year()
month := now.Month()
return fmt.Sprintf(localPrefix, month, year, billingMetric)
month := int(now.Month())
return fmt.Sprintf(BillingMonthStorageFormat, localPrefix, year, month, billingMetric)
}

View File

@ -0,0 +1,21 @@
// Copyright IBM Corp. 2016, 2025
// SPDX-License-Identifier: MPL-2.0
package billing
import (
"testing"
"time"
"github.com/stretchr/testify/require"
)
// TestGetMonthlyBillingPath verifies the GetMonthlyBillingPath function
// returns the correct billing path for the given product area and month
func TestGetMonthlyBillingPath(t *testing.T) {
ts := time.Date(2026, time.January, 5, 12, 0, 0, 0, time.UTC)
got := GetMonthlyBillingPath(ReplicatedPrefix, ts, KvHWMCountsHWM)
want := "replicated/2026/01/maxKvCounts/"
require.Equal(t, got, want)
}

View File

@ -84,16 +84,25 @@ func (c *Core) UpdateReplicatedHWMMetrics(ctx context.Context, currentMonth time
} else {
c.logger.Info("updated replicated hwm role counts", "prefix", billing.ReplicatedPrefix, "currentMonth", currentMonth)
}
if _, err = c.UpdateMaxKvCounts(ctx, billing.ReplicatedPrefix, currentMonth); err != nil {
// We won't return an error. Instead we will log the errors and attempt to continue
c.logger.Error("error updating replicated max kv counts", "error", err)
} else {
c.logger.Info("updated replicated max kv counts", "prefix", billing.ReplicatedPrefix, "currentMonth", currentMonth)
}
return nil
}
func (c *Core) UpdateLocalHWMMetrics(ctx context.Context, currentMonth time.Time) error {
_, err := c.UpdateMaxRoleCounts(ctx, billing.LocalPrefix, currentMonth)
if err != nil {
if _, err := c.UpdateMaxRoleCounts(ctx, billing.LocalPrefix, currentMonth); err != nil {
c.logger.Error("error updating local max role counts", "error", err)
// We won't return an error. Instead we will log the errors and attempt to continue
} else {
c.logger.Info("updated local max role counts", "prefix", billing.LocalPrefix, "currentMonth", currentMonth)
}
if _, err := c.UpdateMaxKvCounts(ctx, billing.LocalPrefix, currentMonth); err != nil {
c.logger.Error("error updating local max kv counts", "error", err)
} else {
c.logger.Info("updated local max kv counts", "prefix", billing.LocalPrefix, "currentMonth", currentMonth)
}
return nil
}

View File

@ -5,6 +5,7 @@ package vault
import (
"context"
"strconv"
"time"
"github.com/hashicorp/vault/sdk/logical"
@ -37,6 +38,83 @@ func combineRoleCounts(ctx context.Context, a, b *RoleCounts) *RoleCounts {
}
}
// storeMaxKvCountsLocked must be called with BillingStorageLock held
func (c *Core) storeMaxKvCountsLocked(ctx context.Context, maxKvCounts int, localPathPrefix string, month time.Time) error {
billingPath := billing.GetMonthlyBillingPath(localPathPrefix, month, billing.KvHWMCountsHWM)
entry := &logical.StorageEntry{
Key: billingPath,
Value: []byte(strconv.Itoa(maxKvCounts)),
}
return c.GetBillingSubView().Put(ctx, entry)
}
// getStoredMaxKvCountsLocked must be called with BillingStorageLock held
func (c *Core) getStoredMaxKvCountsLocked(ctx context.Context, localPathPrefix string, month time.Time) (int, error) {
billingPath := billing.GetMonthlyBillingPath(localPathPrefix, month, billing.KvHWMCountsHWM)
entry, err := c.GetBillingSubView().Get(ctx, billingPath)
if err != nil {
return 0, err
}
if entry == nil {
return 0, nil
}
maxKvCounts, err := strconv.Atoi(string(entry.Value))
if err != nil {
return 0, err
}
return maxKvCounts, nil
}
func (c *Core) GetStoredHWMKvCounts(ctx context.Context, localPathPrefix string, month time.Time) (int, error) {
c.consumptionBilling.BillingStorageLock.RLock()
defer c.consumptionBilling.BillingStorageLock.RUnlock()
return c.getStoredMaxKvCountsLocked(ctx, localPathPrefix, month)
}
// UpdateMaxKvCounts updates the HWM kv counts for the given month, and returns the value that was stored.
func (c *Core) UpdateMaxKvCounts(ctx context.Context, localPathPrefix string, currentMonth time.Time) (int, error) {
c.consumptionBilling.BillingStorageLock.Lock()
defer c.consumptionBilling.BillingStorageLock.Unlock()
local := localPathPrefix == billing.LocalPrefix
// Get the current count of kv version 1 secrets
currentKvCounts, err := c.GetKvUsageMetricsByNamespace(ctx, "1", "", local, !local)
if err != nil {
c.logger.Error("error getting count of kv version 1 secrets", "error", err)
return 0, err
}
totalKvCounts := getTotalSecretsAcrossAllNamespaces(currentKvCounts)
// Get the current count of kv version 2 secrets
currentKvCounts, err = c.GetKvUsageMetricsByNamespace(ctx, "2", "", local, !local)
if err != nil {
c.logger.Error("error getting current count of kv version 2 secrets", "error", err)
return 0, err
}
totalKvCounts += getTotalSecretsAcrossAllNamespaces(currentKvCounts)
// Get the stored max kv counts
maxKvCounts, err := c.getStoredMaxKvCountsLocked(ctx, localPathPrefix, currentMonth)
if err != nil {
c.logger.Error("error getting stored max kv counts", "error", err)
return 0, err
}
if maxKvCounts == 0 {
maxKvCounts = totalKvCounts
}
if totalKvCounts > maxKvCounts {
c.logger.Info("updating max kv counts", "totalKvCounts", totalKvCounts, "maxKvCounts", maxKvCounts)
maxKvCounts = totalKvCounts
}
err = c.storeMaxKvCountsLocked(ctx, maxKvCounts, localPathPrefix, currentMonth)
if err != nil {
c.logger.Error("error storing max kv counts", "error", err)
return 0, err
}
return maxKvCounts, nil
}
// storeMaxRoleCountsLocked must be called with BillingStorageLock held
func (c *Core) storeMaxRoleCountsLocked(ctx context.Context, maxRoleCounts *RoleCounts, localPathPrefix string, month time.Time) error {
billingPath := billing.GetMonthlyBillingPath(localPathPrefix, month, billing.RoleHWMCountsHWM)

View File

@ -203,6 +203,9 @@ func TestHWMRoleCounts(t *testing.T) {
},
}
// Sleep to prevent race conditions during the role initialization
time.Sleep(1 * time.Second)
core.mountsLock.RLock()
defer core.mountsLock.RUnlock()
for _, tc := range testCases {
@ -387,11 +390,74 @@ func TestHWMRoleCounts(t *testing.T) {
}, counts)
}
// TestHWMKvSecretsCounts tests that we correctly store and track the HWM kv counts
// for both kv-v1 and kv-v2 mounts.
func TestHWMKvSecretsCounts(t *testing.T) {
coreConfig := &CoreConfig{
LogicalBackends: roleLogicalBackends,
BillingConfig: billing.BillingConfig{
MetricsUpdateCadence: 3 * time.Second,
},
}
core, _, root := TestCoreUnsealedWithConfig(t, coreConfig)
// Add 1 kv-v1 mount and 1 kv-v2 mount in the root namespace
for _, mount := range []string{"kv-v1", "kv-v2"} {
req := logical.TestRequest(t, logical.CreateOperation, fmt.Sprintf("sys/mounts/%v", mount))
req.Data["type"] = mount
req.ClientToken = root
ctx := namespace.RootContext(context.Background())
_, err := core.HandleRequest(ctx, req)
require.NoError(t, err)
}
// Add two secrets to each mount
for _, mount := range []string{"kv-v1", "kv-v2"} {
for i := 0; i < 2; i++ {
secretName := fmt.Sprintf("secret-%d", i)
addKvSecretToStorage(t, namespace.RootContext(context.Background()), core, mount, root, secretName, mount)
}
}
// Verify that the max kv counts are as expected
timer := time.NewTimer(3 * time.Second)
_ = <-timer.C
counts, err := core.GetStoredHWMKvCounts(context.Background(), billing.ReplicatedPrefix, time.Now())
require.NoError(t, err)
require.Equal(t, 4, counts)
// Add one more secret to the kv-v1 mount
addKvSecretToStorage(t, namespace.RootContext(context.Background()), core, "kv-v1", root, "secret-3", "kv-v1")
// Wait for the metrics update
timer = time.NewTimer(3 * time.Second)
_ = <-timer.C
// Verify that the max kv counts are updated
counts, err = core.GetStoredHWMKvCounts(context.Background(), billing.ReplicatedPrefix, time.Now())
require.NoError(t, err)
require.Equal(t, 5, counts)
// Now delete one secret from the kv-v2 mount
deleteKvSecretFromStorage(t, namespace.RootContext(context.Background()), core, "kv-v2", root, "secret-1", "kv-v2")
// Wait for any metrics updates to complete
timer = time.NewTimer(3 * time.Second)
_ = <-timer.C
// Verify that the max kv counts are still the same
counts, err = core.GetStoredHWMKvCounts(context.Background(), billing.ReplicatedPrefix, time.Now())
require.NoError(t, err)
require.Equal(t, 5, counts)
}
func addRoleToStorage(t *testing.T, core *Core, mount string, key string, numberOfKeys int) {
raw, ok := core.router.root.Get(mount + "/")
if !ok {
return
}
require.NotNil(t, raw)
re := raw.(*routeEntry)
storageView := re.storageView
@ -435,3 +501,39 @@ func deleteAllRolesFromStorage(t *testing.T, core *Core, mount string, key strin
require.NoError(t, err)
require.Len(t, list, 0)
}
func addKvSecretToStorage(t *testing.T, ctx context.Context, core *Core, mount string, token string, secretName string, kvVersion string) {
var req *logical.Request
switch kvVersion {
case "kv-v2":
// KV v2 expects writes to /data/<path> with a nested "data" payload
req = logical.TestRequest(t, logical.UpdateOperation, fmt.Sprintf("%v/data/%s", mount, secretName))
req.Data["data"] = map[string]interface{}{
"foo": "bar",
}
case "kv-v1":
// KV v1 expects writes directly to /<path> with a flat payload
req = logical.TestRequest(t, logical.UpdateOperation, fmt.Sprintf("%v/%s", mount, secretName))
req.Data["foo"] = "bar"
default:
t.Fatalf("invalid kv version: %s", kvVersion)
}
req.ClientToken = token
_, err := core.HandleRequest(ctx, req)
require.NoError(t, err)
}
func deleteKvSecretFromStorage(t *testing.T, ctx context.Context, core *Core, mount string, token string, secretName string, kvVersion string) {
var req *logical.Request
switch kvVersion {
case "kv-v2":
req = logical.TestRequest(t, logical.DeleteOperation, fmt.Sprintf("%v/data/%s", mount, secretName))
case "kv-v1":
req = logical.TestRequest(t, logical.DeleteOperation, fmt.Sprintf("%v/%s", mount, secretName))
default:
t.Fatalf("invalid kv version: %s", kvVersion)
}
req.ClientToken = token
_, err := core.HandleRequest(ctx, req)
require.NoError(t, err)
}

View File

@ -870,3 +870,58 @@ func (c *Core) GetRoleCounts() *RoleCounts {
func (c *Core) GetRoleCountsForCluster() *RoleCounts {
return c.getRoleCountsInternal(true, c.isPrimary())
}
// GetKvUsageMetrics returns a map of namespace paths to KV secret counts.
func (c *Core) GetKvUsageMetrics(ctx context.Context, kvVersion string) (map[string]int, error) {
return c.GetKvUsageMetricsByNamespace(ctx, kvVersion, "", true, true)
}
// GetKvUsageMetricsByNamespace returns a map of namespace paths to KV secret counts within a specific namespace.
func (c *Core) GetKvUsageMetricsByNamespace(ctx context.Context, kvVersion string, nsPath string, includeLocal bool, includeReplicated bool) (map[string]int, error) {
mounts := c.findKvMounts()
results := make(map[string]int)
if kvVersion == "1" || kvVersion == "2" {
var newMounts []*kvMount
for _, mount := range mounts {
if mount.Version == kvVersion {
newMounts = append(newMounts, mount)
}
}
mounts = newMounts
} else if kvVersion != "0" {
return results, fmt.Errorf("kv version %s not supported, must be 0, 1, or 2", kvVersion)
}
for _, m := range mounts {
if !includeLocal && m.Local {
continue
}
if !includeReplicated && !m.Local {
continue
}
if nsPath != "" && !strings.HasPrefix(m.Namespace.Path, nsPath) {
continue
}
select {
case <-ctx.Done():
return nil, fmt.Errorf("context expired")
default:
break
}
c.walkKvMountSecrets(ctx, m)
_, ok := results[m.Namespace.Path]
if ok {
// we need to add, not overwrite
results[m.Namespace.Path] += m.NumSecrets
} else {
results[m.Namespace.Path] = m.NumSecrets
}
}
return results, nil
}

View File

@ -52,6 +52,7 @@ func (b *SystemBackend) useCaseConsumptionBillingPaths() []*framework.Path {
func (b *SystemBackend) handleUseCaseConsumption(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
// Get HWM role counts
replicatedMaxRoleCounts := &RoleCounts{}
replicatedKvHWMCounts := 0
var err error
currentMonth := time.Now()
previousMonth := timeutil.StartOfPreviousMonth(currentMonth)
@ -63,6 +64,10 @@ func (b *SystemBackend) handleUseCaseConsumption(ctx context.Context, req *logic
if err != nil {
return nil, fmt.Errorf("error retrieving replicated max role counts: %w", err)
}
replicatedKvHWMCounts, err = b.Core.UpdateMaxKvCounts(ctx, billing.ReplicatedPrefix, currentMonth)
if err != nil {
return nil, fmt.Errorf("error retrieving replicated max kv counts: %w", err)
}
}
// We always want to get the local max role counts
@ -71,32 +76,50 @@ func (b *SystemBackend) handleUseCaseConsumption(ctx context.Context, req *logic
if err != nil {
return nil, fmt.Errorf("error retrieving local max role counts: %w", err)
}
localKvHWMCounts, err := b.Core.UpdateMaxKvCounts(ctx, billing.LocalPrefix, currentMonth)
if err != nil {
return nil, fmt.Errorf("error retrieving local max kv counts: %w", err)
}
// If we are the primary, then combine the replicated and local max role counts. Else just output the local
// max role counts. replicatedMaxRoleCounts will be empty if we are not a primary, so this is taken care of for us.
combinedMaxRoleCounts := combineRoleCounts(ctx, replicatedMaxRoleCounts, localMaxRoleCounts)
combinedMaxKvCounts := replicatedKvHWMCounts + localKvHWMCounts
var replicatedPreviousMonthRoleCounts *RoleCounts
replicatedPreviousMonthKvHWMCounts := 0
if b.Core.isPrimary() {
replicatedPreviousMonthRoleCounts, err = b.Core.GetStoredHWMRoleCounts(ctx, billing.ReplicatedPrefix, previousMonth)
if err != nil {
return nil, fmt.Errorf("error retrieving replicated max role counts for previous month: %w", err)
}
replicatedPreviousMonthKvHWMCounts, err = b.Core.GetStoredHWMKvCounts(ctx, billing.ReplicatedPrefix, previousMonth)
if err != nil {
return nil, fmt.Errorf("error retrieving replicated max kv counts for previous month: %w", err)
}
}
localPreviousMonthRoleCounts, err := b.Core.GetStoredHWMRoleCounts(ctx, billing.LocalPrefix, previousMonth)
if err != nil {
return nil, fmt.Errorf("error retrieving local max role counts for previous month: %w", err)
}
localPreviousMonthKvHWMCounts, err := b.Core.GetStoredHWMKvCounts(ctx, billing.LocalPrefix, previousMonth)
if err != nil {
return nil, fmt.Errorf("error retrieving local max kv counts for previous month: %w", err)
}
combinedPreviousMonthRoleCounts := combineRoleCounts(ctx, replicatedPreviousMonthRoleCounts, localPreviousMonthRoleCounts)
combinedPreviousMonthKvHWMCounts := replicatedPreviousMonthKvHWMCounts + localPreviousMonthKvHWMCounts
resp := map[string]interface{}{
"current_month": map[string]interface{}{
"timestamp": timeutil.StartOfMonth(currentMonth),
"maximum_role_counts": combinedMaxRoleCounts,
"maximum_kv_counts": combinedMaxKvCounts,
},
"previous_month": map[string]interface{}{
"timestamp": previousMonth,
"maximum_role_counts": combinedPreviousMonthRoleCounts,
"maximum_kv_counts": combinedPreviousMonthKvHWMCounts,
},
}