feat: use 'localhost' endpoint for controlplane nodes

This switches the last usage of Kubernetes controlplane endpoint to use
`localhost` (itself) for controlplane nodes.

Worker nodes still use cluster-wide controlplane endpoint.

This allows controlplane nodes to boot fully even if the controlplane
endpoint (e.g. loadbalancer) doesn't function.

The process of joining etcd still requires either a discovery service or
a proper functioning controlplane endpoint.

With this fix, Talos controlplane nodes can boot successfully without a
loadbalancer being up, while worker nodes obviously won't join.

This improves Talos behavior in single-node clusters when controlplane
endpoint is not available, the node will still boot just fine and
function properly.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2023-01-10 20:46:31 +04:00
parent b0775ebf2c
commit 0a5a8802e7
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
4 changed files with 44 additions and 15 deletions

View File

@ -113,18 +113,7 @@ func (ctrl *NodeLabelsApplyController) getK8sClient(ctx context.Context, r contr
}
if machineType.MachineType().IsControlPlane() {
k8sRoot, err := safe.ReaderGet[*secrets.KubernetesRoot](ctx, r, resource.NewMetadata(secrets.NamespaceName, secrets.KubernetesRootType, secrets.KubernetesRootID, resource.VersionUndefined))
if err != nil {
if state.IsNotFoundError(err) {
return nil, nil
}
return nil, fmt.Errorf("failed to get kubernetes config: %w", err)
}
k8sRootSpec := k8sRoot.TypedSpec()
return kubernetes.NewTemporaryClientFromPKI(k8sRootSpec.CA, k8sRootSpec.Endpoint)
return kubernetes.NewTemporaryClientControlPlane(ctx, r)
}
logger.Debug("waiting for kubelet client config", zap.String("file", constants.KubeletKubeconfig))

View File

@ -7,6 +7,7 @@ package secrets
import (
"context"
"fmt"
"net/url"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
@ -84,7 +85,18 @@ func (ctrl *KubeletController) Run(ctx context.Context, r controller.Runtime, lo
}
func (ctrl *KubeletController) updateKubeletSecrets(cfgProvider talosconfig.Provider, kubeletSecrets *secrets.KubeletSpec) error {
kubeletSecrets.Endpoint = cfgProvider.Cluster().Endpoint()
if cfgProvider.Machine().Type().IsControlPlane() {
// use localhost endpoint for controlplane nodes
localEndpoint, err := url.Parse(fmt.Sprintf("https://localhost:%d", cfgProvider.Cluster().LocalAPIServerPort()))
if err != nil {
return err
}
kubeletSecrets.Endpoint = localEndpoint
} else {
// use cluster endpoint for workers
kubeletSecrets.Endpoint = cfgProvider.Cluster().Endpoint()
}
kubeletSecrets.CA = cfgProvider.Cluster().CA()

View File

@ -1441,7 +1441,11 @@ func UncordonNode(seq runtime.Sequence, data interface{}) (runtime.TaskExecution
if err = retry.Constant(5*time.Minute, retry.WithUnits(time.Second), retry.WithErrorLogging(true)).RetryWithContext(ctx,
func(ctx context.Context) error {
kubeHelper, err = kubernetes.NewClientFromKubeletKubeconfig()
if r.Config().Machine().Type().IsControlPlane() {
kubeHelper, err = kubernetes.NewTemporaryClientControlPlane(ctx, r.State().V1Alpha2().Resources())
} else {
kubeHelper, err = kubernetes.NewClientFromKubeletKubeconfig()
}
return retry.ExpectedError(err)
}); err != nil {
@ -1750,7 +1754,7 @@ func LabelNodeAsControlPlane(seq runtime.Sequence, data interface{}) (runtime.Ta
err = retry.Constant(constants.NodeReadyTimeout, retry.WithUnits(3*time.Second), retry.WithErrorLogging(true)).RetryWithContext(ctx, func(ctx context.Context) error {
var h *kubernetes.Client
h, err = kubernetes.NewTemporaryClientFromPKI(r.Config().Cluster().CA(), r.Config().Cluster().Endpoint())
h, err = kubernetes.NewTemporaryClientControlPlane(ctx, r.State().V1Alpha2().Resources())
if err != nil {
return err
}

View File

@ -14,6 +14,10 @@ import (
"net/url"
"time"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/crypto/x509"
"github.com/siderolabs/go-retry/retry"
"golang.org/x/sync/errgroup"
@ -32,6 +36,7 @@ import (
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1/machine"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/secrets"
)
const (
@ -135,6 +140,25 @@ func NewClientFromPKI(ca, crt, key []byte, endpoint *url.URL) (client *Client, e
}, nil
}
// NewTemporaryClientControlPlane initializes a Kubernetes client for a controlplane node
// using PKI information.
//
// The client uses "localhost" endpoint, so it doesn't depend on the loadbalancer to be ready.
func NewTemporaryClientControlPlane(ctx context.Context, r controller.Reader) (client *Client, err error) {
k8sRoot, err := safe.ReaderGet[*secrets.KubernetesRoot](ctx, r, resource.NewMetadata(secrets.NamespaceName, secrets.KubernetesRootType, secrets.KubernetesRootID, resource.VersionUndefined))
if err != nil {
if state.IsNotFoundError(err) {
return nil, nil
}
return nil, fmt.Errorf("failed to get kubernetes config: %w", err)
}
k8sRootSpec := k8sRoot.TypedSpec()
return NewTemporaryClientFromPKI(k8sRootSpec.CA, k8sRootSpec.LocalEndpoint)
}
// NewTemporaryClientFromPKI initializes a Kubernetes client using a certificate
// with a TTL of 10 minutes.
func NewTemporaryClientFromPKI(ca *x509.PEMEncodedCertificateAndKey, endpoint *url.URL) (client *Client, err error) {