fix: restore the StaticPodStatus resource

It got broken with the changes to the kubelet now sourcing static pods
from a HTTP internal server.

As we don't want it to be broken, and to make health checks better, add
a new check to make sure kubelet reports control plane static pods as
running. This coupled with API server check should make it more
thorough.

Also add logging when static pod definitions are updated (they were
previously there for file-based implementation). These logs are very
helpful for troubleshooting.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2022-10-31 17:37:07 +04:00
parent 1947092ae2
commit 0b41923c36
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
11 changed files with 204 additions and 110 deletions

4
go.mod
View File

@ -40,7 +40,7 @@ require (
github.com/containernetworking/plugins v1.1.1
github.com/coreos/go-iptables v0.6.0
github.com/coreos/go-semver v0.3.0
github.com/cosi-project/runtime v0.2.0-alpha.1.0.20221009084302-e8a8fdcc7548
github.com/cosi-project/runtime v0.2.0-alpha.2
github.com/docker/distribution v2.8.1+incompatible
github.com/docker/docker v20.10.20+incompatible
github.com/docker/go-connections v0.4.0
@ -199,7 +199,7 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
github.com/hashicorp/go-memdb v1.3.3 // indirect
github.com/hashicorp/go-memdb v1.3.4 // indirect
github.com/hashicorp/go-safetemp v1.0.0 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect

8
go.sum
View File

@ -335,8 +335,8 @@ github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzA
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/cosi-project/runtime v0.2.0-alpha.1.0.20221009084302-e8a8fdcc7548 h1:/CMoJlmVdr1XrAoo4cQDPF4rwB2Ap1WCa/BlFfkqOW0=
github.com/cosi-project/runtime v0.2.0-alpha.1.0.20221009084302-e8a8fdcc7548/go.mod h1:u60xdQ7/f8WkO0qsDwPRJuy+0edCDYvwh2xYUvDO+no=
github.com/cosi-project/runtime v0.2.0-alpha.2 h1:B27kw7knAukaHiXwKP51hGh4q2RMaY+wPF+beXBjZMI=
github.com/cosi-project/runtime v0.2.0-alpha.2/go.mod h1:jv79UECqzQaAeVwbdawUGmEBCX0fl7J7JBysnOYIM4U=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
@ -651,8 +651,8 @@ github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjh
github.com/hashicorp/go-immutable-radix v1.3.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc=
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-memdb v1.3.3 h1:oGfEWrFuxtIUF3W2q/Jzt6G85TrMk9ey6XfYLvVe1Wo=
github.com/hashicorp/go-memdb v1.3.3/go.mod h1:uBTr1oQbtuMgd1SSGoR8YV27eT3sBHbYiNm53bMpgSg=
github.com/hashicorp/go-memdb v1.3.4 h1:XSL3NR682X/cVk2IeV0d70N4DZ9ljI885xAEU8IoK3c=
github.com/hashicorp/go-memdb v1.3.4/go.mod h1:uBTr1oQbtuMgd1SSGoR8YV27eT3sBHbYiNm53bMpgSg=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v0.0.0-20161216184304-ed905158d874/go.mod h1:JMRHfdO9jKNzS/+BTlxCjKNQHg/jZAft8U7LloJvN7I=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=

View File

@ -155,8 +155,6 @@ func (ctrl *KubeletStaticPodController) Run(ctx context.Context, r controller.Ru
nodename := nodenameResource.(*k8s.Nodename).TypedSpec().Nodename
// render static pods first, and attempt to build kubelet client last,
// as if kubelet issues certs from the API server, API server should be launched first.
kubeletClient, err = kubelet.NewClient(nodename, secrets.APIServerKubeletClient.Crt, secrets.APIServerKubeletClient.Key, rootSecrets.CA.Crt)
if err != nil {
return fmt.Errorf("error building kubelet client: %w", err)
@ -189,12 +187,18 @@ func (ctrl *KubeletStaticPodController) refreshPodStatus(ctx context.Context, r
podsSeen := map[string]struct{}{}
for _, pod := range podList.Items {
if pod.Metadata.Annotations.ConfigSource != "file" {
pod := pod
switch pod.Metadata.Annotations.ConfigSource {
case "file":
// static pod from a file source
case "http":
// static pod from an HTTP source
default:
// anything else is not a static pod, skip it
continue
}
pod := pod
statusID := fmt.Sprintf("%s/%s", pod.Metadata.Namespace, pod.Metadata.Name)
podsSeen[statusID] = struct{}{}

View File

@ -13,6 +13,7 @@ import (
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"go.uber.org/zap"
"gopkg.in/yaml.v3"
@ -23,6 +24,8 @@ import (
type StaticPodServerController struct {
podList []byte
podListMu sync.Mutex
staticPodVersions map[string]string
}
// Name implements controller.Controller interface.
@ -65,6 +68,8 @@ type podList struct {
//
//nolint:gocyclo,cyclop
func (ctrl *StaticPodServerController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
ctrl.staticPodVersions = map[string]string{}
shutdownServer, serverError, err := ctrl.createServer(ctx, r, logger)
if err != nil {
return fmt.Errorf("failed to start http server to serve static pod list: %w", err)
@ -79,7 +84,7 @@ func (ctrl *StaticPodServerController) Run(ctx context.Context, r controller.Run
case err := <-serverError:
return fmt.Errorf("http server closed unexpectedly: %w", err)
case <-r.EventCh():
staticPodList, err := buildPodList(ctx, r)
staticPodList, err := ctrl.buildPodList(ctx, r, logger)
if err != nil {
logger.Error("error building static pod list", zap.Error(err))
}
@ -91,8 +96,8 @@ func (ctrl *StaticPodServerController) Run(ctx context.Context, r controller.Run
}
}
func buildPodList(ctx context.Context, r controller.Runtime) ([]byte, error) {
staticPods, err := r.List(ctx, resource.NewMetadata(k8s.NamespaceName, k8s.StaticPodType, "", resource.VersionUndefined))
func (ctrl *StaticPodServerController) buildPodList(ctx context.Context, r controller.Runtime, logger *zap.Logger) ([]byte, error) {
staticPods, err := safe.ReaderList[*k8s.StaticPod](ctx, r, resource.NewMetadata(k8s.NamespaceName, k8s.StaticPodType, "", resource.VersionUndefined))
if err != nil {
return nil, fmt.Errorf("error listing static pods: %w", err)
}
@ -102,10 +107,37 @@ func buildPodList(ctx context.Context, r controller.Runtime) ([]byte, error) {
APIVersion: "v1",
}
for _, staticPod := range staticPods.Items {
staticPodSpec := staticPod.(*k8s.StaticPod).TypedSpec()
touchedPodIDs := map[string]struct{}{}
for iter := safe.IteratorFromList(staticPods); iter.Next(); {
id := iter.Value().Metadata().ID()
version := iter.Value().Metadata().Version().String()
if oldVersion, exists := ctrl.staticPodVersions[id]; !exists || oldVersion != version {
ctrl.staticPodVersions[id] = version
if !exists {
logger.Info("rendered new static pod", zap.String("id", id))
} else {
logger.Info("rendered updated static pod", zap.String("id", id), zap.String("old_version", oldVersion), zap.String("new_version", version))
}
}
staticPodSpec := iter.Value().TypedSpec()
pl.Items = append(pl.Items, staticPodSpec.Pod)
touchedPodIDs[id] = struct{}{}
}
for id := range ctrl.staticPodVersions {
if _, exists := touchedPodIDs[id]; exists {
continue
}
logger.Info("removed static pod", zap.String("id", id))
delete(ctrl.staticPodVersions, id)
}
manifestContent, err := yaml.Marshal(pl)

View File

@ -11,7 +11,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
"gopkg.in/yaml.v3"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/digitalocean"
)

View File

@ -1,96 +1,96 @@
addresses:
- address: 128.199.52.32/19
linkName: eth0
family: inet4
scope: global
flags: permanent
layer: platform
- address: 2a03:b0c0:2:d0::1478:3001/64
linkName: eth0
family: inet6
scope: global
flags: permanent
layer: platform
- address: 10.18.0.5/16
linkName: eth0
family: inet4
scope: global
flags: permanent
layer: platform
- address: 10.133.0.2/16
linkName: eth1
family: inet4
scope: global
flags: permanent
layer: platform
- address: 128.199.52.32/19
linkName: eth0
family: inet4
scope: global
flags: permanent
layer: platform
- address: 2a03:b0c0:2:d0::1478:3001/64
linkName: eth0
family: inet6
scope: global
flags: permanent
layer: platform
- address: 10.18.0.5/16
linkName: eth0
family: inet4
scope: global
flags: permanent
layer: platform
- address: 10.133.0.2/16
linkName: eth1
family: inet4
scope: global
flags: permanent
layer: platform
links:
- name: eth0
logical: false
up: true
mtu: 0
kind: ""
type: netrom
layer: platform
- name: eth1
logical: false
up: true
mtu: 0
kind: ""
type: netrom
layer: platform
- name: eth0
logical: false
up: true
mtu: 0
kind: ""
type: netrom
layer: platform
- name: eth1
logical: false
up: true
mtu: 0
kind: ""
type: netrom
layer: platform
routes:
- family: inet4
dst: ""
src: ""
gateway: 128.199.32.1
outLinkName: eth0
table: main
priority: 1024
scope: global
type: unicast
flags: ""
protocol: static
layer: platform
- family: inet4
dst: 169.254.169.254/32
src: ""
gateway: 128.199.32.1
outLinkName: eth0
table: main
priority: 512
scope: global
type: unicast
flags: ""
protocol: static
layer: platform
- family: inet6
dst: ""
src: ""
gateway: 2a03:b0c0:2:d0::1
outLinkName: eth0
table: main
priority: 1024
scope: global
type: unicast
flags: ""
protocol: static
layer: platform
- family: inet4
dst: ""
src: ""
gateway: 128.199.32.1
outLinkName: eth0
table: main
priority: 1024
scope: global
type: unicast
flags: ""
protocol: static
layer: platform
- family: inet4
dst: 169.254.169.254/32
src: ""
gateway: 128.199.32.1
outLinkName: eth0
table: main
priority: 512
scope: global
type: unicast
flags: ""
protocol: static
layer: platform
- family: inet6
dst: ""
src: ""
gateway: 2a03:b0c0:2:d0::1
outLinkName: eth0
table: main
priority: 1024
scope: global
type: unicast
flags: ""
protocol: static
layer: platform
hostnames:
- hostname: debian-s-1vcpu-512mb-10gb-ams3-01
domainname: ""
layer: platform
- hostname: debian-s-1vcpu-512mb-10gb-ams3-01
domainname: ""
layer: platform
resolvers:
- dnsServers:
- 67.207.67.2
- 67.207.67.3
layer: platform
- dnsServers:
- 67.207.67.2
- 67.207.67.3
layer: platform
timeServers: []
operators: []
externalIPs:
- 128.199.52.32
- 128.199.52.32
metadata:
platform: digital-ocean
hostname: debian-s-1vcpu-512mb-10gb-ams3-01
region: ams3
instanceId: "320206672"
providerId: digitalocean://320206672
platform: digital-ocean
hostname: debian-s-1vcpu-512mb-10gb-ams3-01
region: ams3
instanceId: "320206672"
providerId: digitalocean://320206672

View File

@ -71,6 +71,13 @@ func DefaultClusterChecks() []ClusterCheck {
}, 10*time.Minute, 5*time.Second)
},
// wait for k8s control plane static pods
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error {
return K8sControlPlaneStaticPods(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},
// wait for HA k8s control plane
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {

View File

@ -9,13 +9,19 @@ import (
"context"
"fmt"
"net/netip"
"strings"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/siderolabs/gen/maps"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/talos-systems/talos/pkg/cluster"
"github.com/talos-systems/talos/pkg/machinery/client"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
"github.com/talos-systems/talos/pkg/machinery/constants"
"github.com/talos-systems/talos/pkg/machinery/resources/k8s"
)
// K8sAllNodesReportedAssertion checks whether all the nodes show up in node list.
@ -379,3 +385,47 @@ func ReplicaSetPresent(ctx context.Context, cluster cluster.K8sProvider, namespa
return len(rss.Items) > 0, nil
}
// K8sControlPlaneStaticPods checks whether all the controlplane nodes are running required Kubernetes static pods.
//
//nolint:gocyclo,cyclop
func K8sControlPlaneStaticPods(ctx context.Context, cl ClusterInfo) error {
expectedNodes := append(cl.NodesByType(machine.TypeInit), cl.NodesByType(machine.TypeControlPlane)...)
// using here new Talos COSI API, Talos 1.2+ required
c, err := cl.Client()
if err != nil {
return err
}
for _, node := range expectedNodes {
expectedStaticPods := map[string]struct{}{
"kube-system/kube-apiserver": {},
"kube-system/kube-controller-manager": {},
"kube-system/kube-scheduler": {},
}
items, err := safe.StateList[*k8s.StaticPodStatus](
client.WithNode(ctx, node.InternalIP.String()),
c.COSI,
resource.NewMetadata(k8s.NamespaceName, k8s.StaticPodStatusType, "", resource.VersionUndefined),
)
if err != nil {
return fmt.Errorf("error listing static pods on node %s: %w", node.InternalIP, err)
}
for iter := safe.IteratorFromList(items); iter.Next(); {
for expectedStaticPod := range expectedStaticPods {
if strings.HasPrefix(iter.Value().Metadata().ID(), expectedStaticPod) {
delete(expectedStaticPods, expectedStaticPod)
}
}
}
if len(expectedStaticPods) > 0 {
return fmt.Errorf("missing static pods on node %s: %v", node.InternalIP, maps.Keys(expectedStaticPods))
}
}
return nil
}

View File

@ -102,6 +102,6 @@ type Metadata struct {
// Annotations are the annotations on a pod.
type Annotations struct {
// ConfigMapSource indicates where the resource is coming from.
// Its value is "file" for static pods and "api" for resources came from kube-apiserver.
// Its value is "file"/"http" for static pods and "api" for resources came from kube-apiserver.
ConfigSource string `json:"kubernetes.io/config.source"`
}

View File

@ -8,7 +8,7 @@ replace gopkg.in/yaml.v3 => github.com/unix4ever/yaml v0.0.0-20220527175918-f17b
require (
github.com/containerd/go-cni v1.1.7
github.com/cosi-project/runtime v0.2.0-alpha.1.0.20221009084302-e8a8fdcc7548
github.com/cosi-project/runtime v0.2.0-alpha.2
github.com/dustin/go-humanize v1.0.0
github.com/evanphx/json-patch v5.6.0+incompatible
github.com/ghodss/yaml v1.0.0
@ -61,7 +61,7 @@ require (
go.uber.org/zap v1.23.0 // indirect
golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect
golang.org/x/net v0.0.0-20220923203811-8be639271d50 // indirect
golang.org/x/sync v0.0.0-20220923202941-7f9b1623fab7 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec // indirect
golang.org/x/text v0.3.8 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect

View File

@ -22,8 +22,8 @@ github.com/containerd/go-cni v1.1.7 h1:1yKpVCQAXI21BJIy8q7Nyk4CWpIgUno6ib7JIDca7
github.com/containerd/go-cni v1.1.7/go.mod h1:Ve4Q0RB2Bw78D90OL0YVyDjqdTL7FKh9W+UPbhWiZXA=
github.com/containernetworking/cni v1.1.2 h1:wtRGZVv7olUHMOqouPpn3cXJWpJgM6+EUl31EQbXALQ=
github.com/containernetworking/cni v1.1.2/go.mod h1:sDpYKmGVENF3s6uvMvGgldDWeG8dMxakj/u+i9ht9vw=
github.com/cosi-project/runtime v0.2.0-alpha.1.0.20221009084302-e8a8fdcc7548 h1:/CMoJlmVdr1XrAoo4cQDPF4rwB2Ap1WCa/BlFfkqOW0=
github.com/cosi-project/runtime v0.2.0-alpha.1.0.20221009084302-e8a8fdcc7548/go.mod h1:u60xdQ7/f8WkO0qsDwPRJuy+0edCDYvwh2xYUvDO+no=
github.com/cosi-project/runtime v0.2.0-alpha.2 h1:B27kw7knAukaHiXwKP51hGh4q2RMaY+wPF+beXBjZMI=
github.com/cosi-project/runtime v0.2.0-alpha.2/go.mod h1:jv79UECqzQaAeVwbdawUGmEBCX0fl7J7JBysnOYIM4U=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@ -198,8 +198,9 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220923202941-7f9b1623fab7 h1:ZrnxWX62AgTKOSagEqxvb3ffipvEDX2pl7E1TdqLqIc=
golang.org/x/sync v0.0.0-20220923202941-7f9b1623fab7/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=