vault/limits/limiter.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package limits

import (
	"context"
	"errors"
	"fmt"
	"math"
	"sync/atomic"

	"github.com/armon/go-metrics"
	"github.com/hashicorp/go-hclog"
	"github.com/platinummonkey/go-concurrency-limits/core"
	"github.com/platinummonkey/go-concurrency-limits/limit"
	"github.com/platinummonkey/go-concurrency-limits/limiter"
	"github.com/platinummonkey/go-concurrency-limits/strategy"
)

var (
	// ErrCapacity is a new error type to indicate that Vault is not accepting new
	// requests. This should be handled by callers in request paths to return
	// http.StatusServiceUnavailable to the client.
	ErrCapacity = errors.New("Vault server temporarily overloaded")

	// DefaultDebugLogger opts out of the go-concurrency-limits internal Debug
	// logger, since it's rather noisy. We're generating logs of interest in
	// Vault.
	DefaultDebugLogger limit.Logger = nil

	// DefaultMetricsRegistry opts out of the go-concurrency-limits internal
	// metrics because we're tracking what we care about in Vault.
	DefaultMetricsRegistry core.MetricRegistry = core.EmptyMetricRegistryInstance
)

const (
	// Smoothing adjusts how heavily we weight newer high-latency detection.
	// Higher values (>1) place more emphasis on recent measurements. We set
	// this below 1 to better tolerate short-lived spikes in request rate.
	DefaultSmoothing = .1

	// DefaultLongWindow is chosen as a minimum of 1000 samples. longWindow
	// defines sliding window size used for the Exponential Moving Average.
	DefaultLongWindow = 1000
)

// RequestLimiter is a thin wrapper for limiter.DefaultLimiter.
type RequestLimiter struct {
	*limiter.DefaultLimiter
	Flags LimiterFlags
}

// Acquire consults the underlying RequestLimiter to see if a new
// RequestListener can be acquired.
//
// The return values are a *RequestListener, which the caller can use to perform
// latency measurements, and a bool to indicate whether or not a RequestListener
// was acquired.
//
// The returned RequestListener is short-lived and eventually garbage-collected;
// however, the RequestLimiter keeps track of in-flight concurrency using a
// token bucket implementation. The caller must release the resulting Limiter
// token by conducting a measurement.
//
// There are three return cases:
//
// 1) If Request Limiting is disabled, we return an empty RequestListener so all
// measurements are no-ops.
//
// 2) If the request limit has been exceeded, we will not acquire a
// RequestListener and instead return nil, false. No measurement is required,
// since we immediately return from callers with ErrCapacity.
//
// 3) If we have not exceeded the request limit, the caller must call one of
// OnSuccess(), OnDropped(), or OnIgnore() to return a measurement and release
// the underlying Limiter token.
func (l *RequestLimiter) Acquire(ctx context.Context) (*RequestListener, bool) {
	// Transparently handle the case where the limiter is disabled.
	if l == nil || l.DefaultLimiter == nil {
		return &RequestListener{}, true
	}

	lsnr, ok := l.DefaultLimiter.Acquire(ctx)
	if !ok {
		metrics.IncrCounter(([]string{"limits", "concurrency", "service_unavailable"}), 1)
		// If the token acquisition fails, we've reached capacity and we won't
		// get a listener, so just return nil.
		return nil, false
	}

	return &RequestListener{
		DefaultListener: lsnr.(*limiter.DefaultListener),
		released:        new(atomic.Bool),
	}, true
}

// concurrencyChanger adjusts the current allowed concurrency with an
// exponential backoff as we approach the max limit.
func concurrencyChanger(limit int) int {
	change := math.Sqrt(float64(limit))
	if change < 1.0 {
		change = 1.0
	}
	return int(change)
}

var DefaultLimiterFlags = map[string]LimiterFlags{
	// WriteLimiter default flags have a less conservative MinLimit to prevent
	// over-optimizing the request latency, which would result in
	// under-utilization and client starvation.
	WriteLimiter: {
		MinLimit:     100,
		MaxLimit:     5000,
		InitialLimit: 100,
	},

	// SpecialPathLimiter default flags have a conservative MinLimit to allow
	// more aggressive concurrency throttling for CPU-bound workloads such as
	// `pki/issue`.
	SpecialPathLimiter: {
		MinLimit:     5,
		MaxLimit:     5000,
		InitialLimit: 5,
	},
}

// LimiterFlags establish some initial configuration for a new request limiter.
type LimiterFlags struct {
	// MinLimit defines the minimum concurrency floor to prevent over-throttling
	// requests during periods of high traffic.
	MinLimit int `json:"min_limit,omitempty" mapstructure:"min_limit,omitempty"`

	// MaxLimit defines the maximum concurrency ceiling to prevent skewing to a
	// point of no return.
	//
	// We set this to a high value (5000) with the expectation that systems with
	// high-performing specs will tolerate higher limits, while the algorithm
	// will find its own steady-state concurrency well below this threshold in
	// most cases.
	MaxLimit int `json:"max_limit,omitempty" mapstructure:"max_limit,omitempty"`

	// InitialLimit defines the starting concurrency limit prior to any
	// measurements.
	//
	// If we start this value off too high, Vault could become
	// overloaded before the algorithm has a chance to adapt. Setting the value
	// to the minimum is a safety measure which could result in early request
	// rejection; however, the adaptive nature of the algorithm will prevent
	// this from being a prolonged state as the allowed concurrency will
	// increase during normal operation.
	InitialLimit int `json:"initial_limit,omitempty" mapstructure:"initial_limit,omitempty"`
}

// NewRequestLimiter is a basic constructor for the RequestLimiter wrapper. It
// is responsible for setting up the Gradient2 Limit and instantiating a new
// wrapped DefaultLimiter.
func NewRequestLimiter(logger hclog.Logger, name string, flags LimiterFlags) (*RequestLimiter, error) {
	logger.Info("setting up new request limiter",
		"initialLimit", flags.InitialLimit,
		"maxLimit", flags.MaxLimit,
		"minLimit", flags.MinLimit,
	)

	// NewGradient2Limit is the algorithm which drives request limiting
	// decisions. It gathers latency measurements and calculates an Exponential
	// Moving Average to determine whether latency deviation warrants a change
	// in the current concurrency limit.
	lim, err := limit.NewGradient2Limit(name,
		flags.InitialLimit,
		flags.MaxLimit,
		flags.MinLimit,
		concurrencyChanger,
		DefaultSmoothing,
		DefaultLongWindow,
		DefaultDebugLogger,
		DefaultMetricsRegistry,
	)
	if err != nil {
		return &RequestLimiter{}, fmt.Errorf("failed to create gradient2 limit: %w", err)
	}

	strategy := strategy.NewSimpleStrategy(flags.InitialLimit)
	defLimiter, err := limiter.NewDefaultLimiter(lim, 1e9, 1e9, 10, 100, strategy, nil, DefaultMetricsRegistry)
	if err != nil {
		return &RequestLimiter{}, err
	}

	return &RequestLimiter{Flags: flags, DefaultLimiter: defLimiter}, nil
}