fix: set proper timeouts for KubePrism loadbalancer

The default timeouts are very aggressive, and we should use explicit
timeouts so that healh checks don't run that often.

Fixes #7690

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2023-09-01 00:14:01 +04:00
parent b8fb55d5c2
commit 79bbdf454e
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
2 changed files with 24 additions and 0 deletions

View File

@ -16,9 +16,11 @@ import (
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/gen/slices"
"github.com/siderolabs/go-loadbalancer/controlplane"
"github.com/siderolabs/go-loadbalancer/upstream"
"github.com/siderolabs/go-pointer"
"go.uber.org/zap"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/k8s"
)
@ -190,6 +192,13 @@ func (ctrl *KubePrismController) startKubePrism(lbCfg *k8s.KubePrismConfig, logg
lb, err := controlplane.NewLoadBalancer(ctrl.balancerHost, ctrl.balancerPort,
logger.WithOptions(zap.IncreaseLevel(zap.ErrorLevel)), // silence the load balancer logs
controlplane.WithDialTimeout(constants.KubePrismDialTimeout),
controlplane.WithKeepAlivePeriod(constants.KubePrismKeepAlivePeriod),
controlplane.WithTCPUserTimeout(constants.KubePrismTCPUserTimeout),
controlplane.WithHealthCheckOptions(
upstream.WithHealthcheckInterval(constants.KubePrismHealthCheckInterval),
upstream.WithHealthcheckTimeout(constants.KubePrismHealthCheckTimeout),
),
)
if err != nil {
return fmt.Errorf("failed to create KubePrism: %w", err)

View File

@ -896,6 +896,21 @@ const (
// TcellMinimizeEnvironment is the environment variable to minimize tcell library memory usage (skips rune width calculation).
TcellMinimizeEnvironment = "TCELL_MINIMIZE=1"
// KubePrismDialTimeout is the timeout for the KubePrism loadbalancer dialing an endpoint.
KubePrismDialTimeout = 15 * time.Second
// KubePrismKeepAlivePeriod is the TCP keepalive period for the KubePrism loadbalancer.
KubePrismKeepAlivePeriod = 30 * time.Second
// KubePrismTCPUserTimeout is the TCP user timeout for the KubePrism loadbalancer.
KubePrismTCPUserTimeout = 30 * time.Second
// KubePrismHealthCheckInterval is the interval between health checks for the KubePrism loadbalancer.
KubePrismHealthCheckInterval = 20 * time.Second
// KubePrismHealthCheckTimeout is the timeout for health checks for the KubePrism loadbalancer.
KubePrismHealthCheckTimeout = 15 * time.Second
)
// See https://linux.die.net/man/3/klogctl