mirror of
https://github.com/siderolabs/sidero.git
synced 2026-05-12 16:36:10 +02:00
fix: use controller-runtime standard healthz endpoints
Fixes #717 `caps-controller-manager` has proper webhook support, so use that for readiness/liveness checks (standard CAPI way). `sidero-controller-manager` doesn't have webhooks (we should fix it eventually!), so using iPXE check Also: * bump Talos to 0.14.1 * use Talos provided default arguments for the agent environment (as agent is running Talos kernel, it makes sense). Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
c73d8e52b1
commit
e44f350d08
2
Makefile
2
Makefile
@ -9,7 +9,7 @@ MODULE := $(shell head -1 go.mod | cut -d' ' -f2)
|
||||
|
||||
ARTIFACTS := _out
|
||||
TEST_PKGS ?= ./...
|
||||
TALOS_RELEASE ?= v0.14.0-alpha.2
|
||||
TALOS_RELEASE ?= v0.14.1
|
||||
PREVIOUS_TALOS_RELEASE ?= v0.13.4
|
||||
DEFAULT_K8S_VERSION ?= v1.22.3
|
||||
|
||||
|
||||
@ -30,4 +30,16 @@ spec:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
ports:
|
||||
- containerPort: 9440
|
||||
name: healthz
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: healthz
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: healthz
|
||||
terminationGracePeriodSeconds: 10
|
||||
|
||||
@ -51,11 +51,13 @@ func init() {
|
||||
func main() {
|
||||
var (
|
||||
metricsAddr string
|
||||
healthAddr string
|
||||
enableLeaderElection bool
|
||||
webhookPort int
|
||||
)
|
||||
|
||||
flag.StringVar(&metricsAddr, "metrics-bind-addr", ":8080", "The address the metric endpoint binds to.")
|
||||
flag.StringVar(&healthAddr, "health-addr", ":9440", "The address the health endpoint binds to.")
|
||||
flag.BoolVar(&enableLeaderElection, "enable-leader-election", true,
|
||||
"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
|
||||
flag.IntVar(&webhookPort, "webhook-port", 9443, "Webhook Server port, disabled by default. When enabled, the manager will only work as webhook server, no reconcilers are installed.")
|
||||
@ -82,12 +84,13 @@ func main() {
|
||||
})
|
||||
|
||||
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
|
||||
Scheme: scheme,
|
||||
MetricsBindAddress: metricsAddr,
|
||||
LeaderElection: enableLeaderElection,
|
||||
LeaderElectionID: "controller-leader-election-capm",
|
||||
Port: webhookPort,
|
||||
EventBroadcaster: broadcaster,
|
||||
Scheme: scheme,
|
||||
MetricsBindAddress: metricsAddr,
|
||||
LeaderElection: enableLeaderElection,
|
||||
LeaderElectionID: "controller-leader-election-capm",
|
||||
Port: webhookPort,
|
||||
EventBroadcaster: broadcaster,
|
||||
HealthProbeBindAddress: healthAddr,
|
||||
})
|
||||
if err != nil {
|
||||
setupLog.Error(err, "unable to start manager")
|
||||
@ -110,7 +113,7 @@ func main() {
|
||||
mgr.GetScheme(),
|
||||
corev1.EventSource{Component: "caps-controller-manager"})
|
||||
|
||||
ctx := context.Background()
|
||||
ctx := ctrl.SetupSignalHandler()
|
||||
|
||||
if err = (&controllers.MetalClusterReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
@ -162,10 +165,24 @@ func main() {
|
||||
}
|
||||
// +kubebuilder:scaffold:builder
|
||||
|
||||
setupChecks(mgr)
|
||||
|
||||
setupLog.Info("starting manager")
|
||||
|
||||
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
|
||||
if err := mgr.Start(ctx); err != nil {
|
||||
setupLog.Error(err, "problem running manager")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func setupChecks(mgr ctrl.Manager) {
|
||||
if err := mgr.AddReadyzCheck("webhook", mgr.GetWebhookServer().StartedChecker()); err != nil {
|
||||
setupLog.Error(err, "unable to create ready check")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err := mgr.AddHealthzCheck("webhook", mgr.GetWebhookServer().StartedChecker()); err != nil {
|
||||
setupLog.Error(err, "unable to create health check")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,6 +82,9 @@ spec:
|
||||
- name: http
|
||||
containerPort: ${SIDERO_CONTROLLER_MANAGER_CONTAINER_API_PORT:=8081}
|
||||
protocol: TCP
|
||||
- containerPort: 9440
|
||||
name: healthz
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: API_ENDPOINT
|
||||
valueFrom:
|
||||
@ -96,14 +99,12 @@ spec:
|
||||
memory: 128Mi
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 15
|
||||
path: /readyz
|
||||
port: healthz
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 15
|
||||
port: healthz
|
||||
- command:
|
||||
- /siderolink-manager
|
||||
args:
|
||||
|
||||
@ -1,19 +0,0 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package healthz
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func RegisterServer(mux *http.ServeMux) error {
|
||||
mux.HandleFunc("/healthz", healthzHandler)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func healthzHandler(w http.ResponseWriter, req *http.Request) {
|
||||
// do nothing, consider to be healthy always
|
||||
}
|
||||
@ -9,12 +9,14 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@ -23,9 +25,11 @@ import (
|
||||
"sigs.k8s.io/cluster-api/util/conditions"
|
||||
"sigs.k8s.io/cluster-api/util/patch"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/healthz"
|
||||
|
||||
"github.com/talos-systems/go-procfs/procfs"
|
||||
talosconstants "github.com/talos-systems/talos/pkg/machinery/constants"
|
||||
"github.com/talos-systems/talos/pkg/machinery/kernel"
|
||||
|
||||
infrav1 "github.com/talos-systems/sidero/app/caps-controller-manager/api/v1alpha3"
|
||||
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
|
||||
@ -369,23 +373,15 @@ func newEnvironment(server *metalv1alpha1.Server, serverBinding *infrav1.ServerB
|
||||
}
|
||||
|
||||
func newAgentEnvironment(arch string) *metalv1alpha1.Environment {
|
||||
args := []string{
|
||||
args := append([]string(nil), kernel.DefaultArgs...)
|
||||
args = append(args,
|
||||
"console=tty0",
|
||||
"console=ttyS0",
|
||||
"ima_appraise=fix",
|
||||
"ima_hash=sha512",
|
||||
"ima_template=ima-ng",
|
||||
"initrd=initramfs.xz",
|
||||
"ip=dhcp",
|
||||
"page_poison=1",
|
||||
"panic=30",
|
||||
"printk.devkmsg=on",
|
||||
"pti=on",
|
||||
"random.trust_cpu=on",
|
||||
"slab_nomerge=",
|
||||
"slub_debug=P",
|
||||
fmt.Sprintf("%s=%s:%d", constants.AgentEndpointArg, apiEndpoint, apiPort),
|
||||
}
|
||||
)
|
||||
|
||||
cmdline := procfs.NewCmdline(strings.Join(args, " "))
|
||||
extra := procfs.NewCmdline(extraAgentKernelArgs)
|
||||
@ -513,3 +509,33 @@ func markAsPXEBooted(server *metalv1alpha1.Server) error {
|
||||
Conditions: []clusterv1.ConditionType{metalv1alpha1.ConditionPXEBooted},
|
||||
})
|
||||
}
|
||||
|
||||
func Check(addr string) healthz.Checker {
|
||||
return func(_ *http.Request) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("http://%s/boot.ipxe", addr), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if resp.Body != nil {
|
||||
io.Copy(io.Discard, resp.Body) //nolint:errcheck
|
||||
resp.Body.Close() //nolint:errcheck
|
||||
}
|
||||
}()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("unexpected code %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -32,7 +32,6 @@ import (
|
||||
infrav1 "github.com/talos-systems/sidero/app/caps-controller-manager/api/v1alpha3"
|
||||
metalv1alpha1 "github.com/talos-systems/sidero/app/sidero-controller-manager/api/v1alpha1"
|
||||
"github.com/talos-systems/sidero/app/sidero-controller-manager/controllers"
|
||||
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/healthz"
|
||||
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/ipxe"
|
||||
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/metadata"
|
||||
"github.com/talos-systems/sidero/app/sidero-controller-manager/internal/power/api"
|
||||
@ -71,6 +70,7 @@ func init() {
|
||||
func main() {
|
||||
var (
|
||||
metricsAddr string
|
||||
healthAddr string
|
||||
apiEndpoint string
|
||||
apiPort int
|
||||
httpPort int
|
||||
@ -91,6 +91,7 @@ func main() {
|
||||
flag.IntVar(&apiPort, "api-port", 8081, "The TCP port Sidero components can be reached at from the servers.")
|
||||
flag.IntVar(&httpPort, "http-port", 8081, "The TCP port Sidero controller manager HTTP server is running.")
|
||||
flag.StringVar(&metricsAddr, "metrics-bind-addr", ":8081", "The address the metric endpoint binds to.")
|
||||
flag.StringVar(&healthAddr, "health-addr", ":9440", "The address the health endpoint binds to.")
|
||||
flag.StringVar(&extraAgentKernelArgs, "extra-agent-kernel-args", "", "A list of Linux kernel command line arguments to add to the agent environment kernel parameters (e.g. 'console=tty1 console=ttyS1').")
|
||||
flag.StringVar(&bootFromDiskMethod, "boot-from-disk-method", string(ipxe.BootIPXEExit), "Default method to use to boot server from disk if it hits iPXE endpoint after install.")
|
||||
flag.BoolVar(&enableLeaderElection, "enable-leader-election", true, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
|
||||
@ -150,11 +151,12 @@ func main() {
|
||||
api.DefaultDice = api.NewFailureDice(testPowerSimulatedExplicitFailureProb, testPowerSimulatedSilentFailureProb)
|
||||
|
||||
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
|
||||
Scheme: scheme,
|
||||
MetricsBindAddress: metricsAddr,
|
||||
LeaderElection: enableLeaderElection,
|
||||
LeaderElectionID: "controller-leader-election-sidero-controller-manager",
|
||||
Port: 9443,
|
||||
Scheme: scheme,
|
||||
MetricsBindAddress: metricsAddr,
|
||||
LeaderElection: enableLeaderElection,
|
||||
LeaderElectionID: "controller-leader-election-sidero-controller-manager",
|
||||
Port: 9443,
|
||||
HealthProbeBindAddress: healthAddr,
|
||||
})
|
||||
if err != nil {
|
||||
setupLog.Error(err, "unable to start manager")
|
||||
@ -177,7 +179,7 @@ func main() {
|
||||
mgr.GetScheme(),
|
||||
corev1.EventSource{Component: "sidero-controller-manager"})
|
||||
|
||||
ctx := context.Background()
|
||||
ctx := ctrl.SetupSignalHandler()
|
||||
|
||||
if err = (&controllers.EnvironmentReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
@ -212,6 +214,9 @@ func main() {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "ServerClass")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
setupChecks(mgr, httpPort)
|
||||
|
||||
// +kubebuilder:scaffold:builder
|
||||
|
||||
errCh := make(chan error)
|
||||
@ -242,13 +247,6 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
setupLog.Info("starting healthz server")
|
||||
|
||||
if err := healthz.RegisterServer(httpMux); err != nil {
|
||||
setupLog.Error(err, "unable to start healthz server", "controller", "Environment")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
setupLog.Info("starting internal API server")
|
||||
|
||||
apiRecorder := eventBroadcaster.NewRecorder(
|
||||
@ -283,7 +281,7 @@ func main() {
|
||||
setupLog.Info("starting manager and HTTP server")
|
||||
|
||||
go func() {
|
||||
err := mgr.Start(ctrl.SetupSignalHandler())
|
||||
err := mgr.Start(ctx)
|
||||
if err != nil {
|
||||
setupLog.Error(err, "problem running manager")
|
||||
}
|
||||
@ -326,3 +324,17 @@ func main() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setupChecks(mgr ctrl.Manager, httpPort int) {
|
||||
addr := fmt.Sprintf("127.0.0.1:%d", httpPort)
|
||||
|
||||
if err := mgr.AddReadyzCheck("ipxe", ipxe.Check(addr)); err != nil {
|
||||
setupLog.Error(err, "unable to create ready check")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err := mgr.AddHealthzCheck("webhook", ipxe.Check(addr)); err != nil {
|
||||
setupLog.Error(err, "unable to create health check")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
2
go.mod
2
go.mod
@ -27,7 +27,7 @@ require (
|
||||
github.com/talos-systems/grpc-proxy v0.2.0
|
||||
github.com/talos-systems/net v0.3.1
|
||||
github.com/talos-systems/siderolink v0.1.1-0.20211130121818-9902ad2774f0
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.1
|
||||
go.uber.org/zap v1.20.0
|
||||
golang.org/x/net v0.0.0-20220114011407-0dd24b26b47d
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
|
||||
|
||||
3
go.sum
3
go.sum
@ -663,8 +663,9 @@ github.com/talos-systems/siderolink v0.1.1-0.20211130121818-9902ad2774f0/go.mod
|
||||
github.com/talos-systems/talos/pkg/machinery v0.12.3/go.mod h1:qX77JMZawrDTQaJucqecdlFsHy+dbnZ9YL8Kw4qL7d4=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.13.0/go.mod h1:fQx1FlvFLSexSOYL1DSl0EjtazujlzNmVDCt2yRoLJ4=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0-alpha.1.0.20211118180932-1ffa8e048008/go.mod h1:D8NT4Aj+X2OpA6yK6RAtpw1wcgkDS7oD23vqOQWRiP8=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0 h1:UKk33z236rMWHsSMhu6ExlG1uB5dF7jws3qRDP+yycA=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0/go.mod h1:ctbMKkPJv8aiGfXT2NuWaoHch7fx62GaU81OVOyNVbc=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.1 h1:ecvzW8OMlWxfdGsiL6cVwtEOd4IwIYTIgRaEEFxyuTc=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.1/go.mod h1:ctbMKkPJv8aiGfXT2NuWaoHch7fx62GaU81OVOyNVbc=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
|
||||
|
||||
@ -22,8 +22,8 @@ require (
|
||||
github.com/talos-systems/go-retry v0.3.1
|
||||
github.com/talos-systems/net v0.3.1
|
||||
github.com/talos-systems/sidero v0.0.0-00010101000000-000000000000
|
||||
github.com/talos-systems/talos v0.14.0
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0
|
||||
github.com/talos-systems/talos v0.14.1
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.1
|
||||
google.golang.org/grpc v1.43.0
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
|
||||
k8s.io/api v0.23.1
|
||||
|
||||
@ -1125,13 +1125,14 @@ github.com/talos-systems/net v0.3.1 h1:F9mlDgKE4XVfgpoRmacVUTEAMAeQ5xuOaeCl+A9a0
|
||||
github.com/talos-systems/net v0.3.1/go.mod h1:zhcGixNJz9dgwFiUwc7gkkAqdVqXagU1SNNoIVXYKGo=
|
||||
github.com/talos-systems/siderolink v0.1.0/go.mod h1:bEGwDYl9QgC3oZ4kdnJTuR2HX/XlUhxZjx/QAakKuBc=
|
||||
github.com/talos-systems/siderolink v0.1.1-0.20211130121818-9902ad2774f0/go.mod h1:bEGwDYl9QgC3oZ4kdnJTuR2HX/XlUhxZjx/QAakKuBc=
|
||||
github.com/talos-systems/talos v0.14.0 h1:3RyC7FgpQ5ZWjiyfz57u4qG/l0+pR52hDSJnGPxBgZg=
|
||||
github.com/talos-systems/talos v0.14.0/go.mod h1:UWuLwoQGsUHEddTqwfvxNOyYTOzd2n9KiV+pyfIJT5M=
|
||||
github.com/talos-systems/talos v0.14.1 h1:z6pr4mvs32DiG8M46tb88v2d9jBNLEBq6zxWgSlTC2U=
|
||||
github.com/talos-systems/talos v0.14.1/go.mod h1:GavvnvQzEp4A00+R8ecpd26TpXWVroRhKLavIK3P2fI=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.12.3/go.mod h1:qX77JMZawrDTQaJucqecdlFsHy+dbnZ9YL8Kw4qL7d4=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.13.0/go.mod h1:fQx1FlvFLSexSOYL1DSl0EjtazujlzNmVDCt2yRoLJ4=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0-alpha.1.0.20211118180932-1ffa8e048008/go.mod h1:D8NT4Aj+X2OpA6yK6RAtpw1wcgkDS7oD23vqOQWRiP8=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0 h1:UKk33z236rMWHsSMhu6ExlG1uB5dF7jws3qRDP+yycA=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.0/go.mod h1:ctbMKkPJv8aiGfXT2NuWaoHch7fx62GaU81OVOyNVbc=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.1 h1:ecvzW8OMlWxfdGsiL6cVwtEOd4IwIYTIgRaEEFxyuTc=
|
||||
github.com/talos-systems/talos/pkg/machinery v0.14.1/go.mod h1:ctbMKkPJv8aiGfXT2NuWaoHch7fx62GaU81OVOyNVbc=
|
||||
github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user