vault/sdk/logical/response_util.go
Mike Palmiotto 43be9fc18a
Request Limiter (#25093)
This commit introduces two new adaptive concurrency limiters in Vault,
which should handle overloading of the server during periods of
untenable request rate. The limiter adjusts the number of allowable
in-flight requests based on latency measurements performed across the
request duration. This approach allows us to reject entire requests
prior to doing any work and prevents clients from exceeding server
capacity.

The limiters intentionally target two separate vectors that have been
proven to lead to server over-utilization.

- Back pressure from the storage backend, resulting in bufferbloat in
  the WAL system. (enterprise)
- Back pressure from CPU over-utilization via PKI issue requests
  (specifically for RSA keys), resulting in failed heartbeats.

Storage constraints can be accounted for by limiting logical requests
according to their http.Method. We only limit requests with write-based
methods, since these will result in storage Puts and exhibit the
aforementioned bufferbloat.

CPU constraints are accounted for using the same underlying library and
technique; however, they require special treatment. The maximum number
of concurrent pki/issue requests found in testing (again, specifically
for RSA keys) is far lower than the minimum tolerable write request
rate. Without separate limiting, we would artificially impose limits on
tolerable request rates for non-PKI requests. To specifically target PKI
issue requests, we add a new PathsSpecial field, called limited,
allowing backends to specify a list of paths which should get
special-case request limiting.

For the sake of code cleanliness and future extensibility, we introduce
the concept of a LimiterRegistry. The registry proposed in this PR has
two entries, corresponding with the two vectors above. Each Limiter
entry has its own corresponding maximum and minimum concurrency,
allowing them to react to latency deviation independently and handle
high volumes of requests to targeted bottlenecks (CPU and storage).

In both cases, utilization will be effectively throttled before Vault
reaches any degraded state. The resulting 503 - Service Unavailable is a
retryable HTTP response code, which can be handled to gracefully retry
and eventually succeed. Clients should handle this by retrying with
jitter and exponential backoff. This is done within Vault's API, using
the go-retryablehttp library.

Limiter testing was performed via benchmarks of mixed workloads and
across a deployment of agent pods with great success.
2024-01-26 14:26:21 -05:00

221 lines
6.5 KiB
Go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0
package logical
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"github.com/hashicorp/errwrap"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/vault/sdk/helper/consts"
)
// RespondErrorCommon pulls most of the functionality from http's
// respondErrorCommon and some of http's handleLogical and makes it available
// to both the http package and elsewhere.
func RespondErrorCommon(req *Request, resp *Response, err error) (int, error) {
if err == nil && (resp == nil || !resp.IsError()) {
switch {
case req.Operation == ReadOperation || req.Operation == HeaderOperation:
if resp == nil {
return http.StatusNotFound, nil
}
// Basically: if we have empty "keys" or no keys at all, 404. This
// provides consistency with GET.
case req.Operation == ListOperation && (resp == nil || resp.WrapInfo == nil):
if resp == nil {
return http.StatusNotFound, nil
}
if len(resp.Data) == 0 {
if len(resp.Warnings) > 0 {
return 0, nil
}
return http.StatusNotFound, nil
}
keysRaw, ok := resp.Data["keys"]
if !ok || keysRaw == nil {
// If we don't have keys but have other data, return as-is
if len(resp.Data) > 0 || len(resp.Warnings) > 0 {
return 0, nil
}
return http.StatusNotFound, nil
}
var keys []string
switch keysRaw.(type) {
case []interface{}:
keys = make([]string, len(keysRaw.([]interface{})))
for i, el := range keysRaw.([]interface{}) {
s, ok := el.(string)
if !ok {
return http.StatusInternalServerError, nil
}
keys[i] = s
}
case []string:
keys = keysRaw.([]string)
default:
return http.StatusInternalServerError, nil
}
if len(keys) == 0 {
return http.StatusNotFound, nil
}
}
return 0, nil
}
if errwrap.ContainsType(err, new(ReplicationCodedError)) {
var allErrors error
var codedErr *ReplicationCodedError
errwrap.Walk(err, func(inErr error) {
// The Walk function does not just traverse leaves, and execute the
// callback function on the entire error first. So, if the error is
// of type multierror.Error, we may want to skip storing the entire
// error first to avoid adding duplicate errors when walking down
// the leaf errors
if _, ok := inErr.(*multierror.Error); ok {
return
}
newErr, ok := inErr.(*ReplicationCodedError)
if ok {
codedErr = newErr
} else {
// if the error is of type fmt.wrapError which is typically
// made by calling fmt.Errorf("... %w", err), allErrors will
// contain duplicated error messages
allErrors = multierror.Append(allErrors, inErr)
}
})
if allErrors != nil {
return codedErr.Code, multierror.Append(fmt.Errorf("errors from both primary and secondary; primary error was %v; secondary errors follow", codedErr.Msg), allErrors)
}
return codedErr.Code, errors.New(codedErr.Msg)
}
// Start out with internal server error since in most of these cases there
// won't be a response so this won't be overridden
statusCode := http.StatusInternalServerError
// If we actually have a response, start out with bad request
if resp != nil {
statusCode = http.StatusBadRequest
}
// Now, check the error itself; if it has a specific logical error, set the
// appropriate code
if err != nil {
switch {
case errwrap.ContainsType(err, new(StatusBadRequest)):
statusCode = http.StatusBadRequest
case errwrap.Contains(err, ErrPermissionDenied.Error()):
statusCode = http.StatusForbidden
case errwrap.Contains(err, consts.ErrInvalidWrappingToken.Error()):
statusCode = http.StatusBadRequest
case errwrap.Contains(err, ErrUnsupportedOperation.Error()):
statusCode = http.StatusMethodNotAllowed
case errwrap.Contains(err, ErrUnsupportedPath.Error()):
statusCode = http.StatusNotFound
case errwrap.Contains(err, ErrInvalidRequest.Error()):
statusCode = http.StatusBadRequest
case errwrap.Contains(err, ErrUpstreamRateLimited.Error()):
statusCode = http.StatusBadGateway
case errwrap.Contains(err, ErrRateLimitQuotaExceeded.Error()):
statusCode = http.StatusTooManyRequests
case errwrap.Contains(err, ErrLeaseCountQuotaExceeded.Error()):
statusCode = http.StatusTooManyRequests
case errwrap.Contains(err, ErrMissingRequiredState.Error()):
statusCode = http.StatusPreconditionFailed
case errwrap.Contains(err, ErrPathFunctionalityRemoved.Error()):
statusCode = http.StatusNotFound
case errwrap.Contains(err, ErrRelativePath.Error()):
statusCode = http.StatusBadRequest
case errwrap.Contains(err, ErrInvalidCredentials.Error()):
statusCode = http.StatusBadRequest
case errors.Is(err, ErrNotFound):
statusCode = http.StatusNotFound
}
}
if resp != nil && resp.IsError() {
err = fmt.Errorf("%s", resp.Data["error"].(string))
}
return statusCode, err
}
// AdjustErrorStatusCode adjusts the status that will be sent in error
// conditions in a way that can be shared across http's respondError and other
// locations.
func AdjustErrorStatusCode(status *int, err error) {
// Handle nested errors
if t, ok := err.(*multierror.Error); ok {
for _, e := range t.Errors {
AdjustErrorStatusCode(status, e)
}
}
// Adjust status code when sealed
if errwrap.Contains(err, consts.ErrSealed.Error()) {
*status = http.StatusServiceUnavailable
}
if errwrap.Contains(err, consts.ErrAPILocked.Error()) {
*status = http.StatusServiceUnavailable
}
// Adjust status code on
if errwrap.Contains(err, "http: request body too large") {
*status = http.StatusRequestEntityTooLarge
}
// Allow HTTPCoded error passthrough to specify a code
if t, ok := err.(HTTPCodedError); ok {
*status = t.Code()
}
}
func RespondError(w http.ResponseWriter, status int, err error) {
AdjustErrorStatusCode(&status, err)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
type ErrorResponse struct {
Errors []string `json:"errors"`
}
resp := &ErrorResponse{Errors: make([]string, 0, 1)}
if err != nil {
resp.Errors = append(resp.Errors, err.Error())
}
enc := json.NewEncoder(w)
enc.Encode(resp)
}
func RespondErrorAndData(w http.ResponseWriter, status int, data interface{}, err error) {
AdjustErrorStatusCode(&status, err)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
type ErrorAndDataResponse struct {
Errors []string `json:"errors"`
Data interface{} `json:"data"`
}
resp := &ErrorAndDataResponse{Errors: make([]string, 0, 1)}
if err != nil {
resp.Errors = append(resp.Errors, err.Error())
}
resp.Data = data
enc := json.NewEncoder(w)
enc.Encode(resp)
}