mirror of
https://github.com/cloudnativelabs/kube-router.git
synced 2025-10-07 16:01:08 +02:00
more work on healthchecks
This commit is contained in:
parent
77cb340cfd
commit
e53aef280c
@ -1,6 +1,7 @@
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
@ -42,15 +43,23 @@ func sendHeartBeat(channel chan<- *ControllerHeartbeat, controller string) {
|
||||
func (hc *HealthController) Handler(w http.ResponseWriter, req *http.Request) {
|
||||
if hc.Status.Healthy {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("These aren't the droids you're looking for\n"))
|
||||
w.Write([]byte("OK\n"))
|
||||
} else {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte("These are the droids you're looking for\n"))
|
||||
statusText := fmt.Sprintf("Service controller last alive %s\n ago"+
|
||||
"Routing controller last alive: %s\n ago"+
|
||||
"Policy controller last alive: %s\n ago"+
|
||||
"Metrics controller last alive: %s\n ago",
|
||||
time.Since(hc.Status.NetworkServicesControllerAlive),
|
||||
time.Since(hc.Status.NetworkRoutingControllerAlive),
|
||||
time.Since(hc.Status.NetworkPolicyControllerAlive),
|
||||
time.Since(hc.Status.MetricsControllerAlive))
|
||||
w.Write([]byte(statusText))
|
||||
}
|
||||
}
|
||||
|
||||
func (hc *HealthController) HandleHeartbeat(beat *ControllerHeartbeat) {
|
||||
glog.Infof("Received heartbeat from %s", beat.Component)
|
||||
glog.V(3).Infof("Received heartbeat from %s", beat.Component)
|
||||
switch component := beat.Component; component {
|
||||
case "NSC":
|
||||
hc.Status.NetworkServicesControllerAlive = time.Now()
|
||||
@ -64,22 +73,35 @@ func (hc *HealthController) HandleHeartbeat(beat *ControllerHeartbeat) {
|
||||
}
|
||||
|
||||
func (hc *HealthController) CheckHealth() bool {
|
||||
glog.V(4).Info("Checking components")
|
||||
health := true
|
||||
if time.Since(hc.Status.NetworkPolicyControllerAlive) > hc.Config.IPTablesSyncPeriod+3*time.Second {
|
||||
glog.Error("Network Policy Controller heartbeat timeout")
|
||||
health = false
|
||||
if hc.Config.RunFirewall {
|
||||
if time.Since(hc.Status.NetworkPolicyControllerAlive) > hc.Config.IPTablesSyncPeriod+3*time.Second {
|
||||
glog.Error("Network Policy Controller heartbeat missed")
|
||||
health = false
|
||||
}
|
||||
}
|
||||
|
||||
if time.Since(hc.Status.NetworkRoutingControllerAlive) > hc.Config.RoutesSyncPeriod+3*time.Second {
|
||||
glog.Error("Network Routing Controller heartbeat timeout")
|
||||
health = false
|
||||
if hc.Config.RunRouter {
|
||||
if time.Since(hc.Status.NetworkRoutingControllerAlive) > hc.Config.RoutesSyncPeriod+3*time.Second {
|
||||
glog.Error("Network Routing Controller heartbeat missed")
|
||||
health = false
|
||||
}
|
||||
}
|
||||
|
||||
if time.Since(hc.Status.NetworkServicesControllerAlive) > hc.Config.IpvsSyncPeriod+3*time.Second {
|
||||
glog.Error("NetworkService Controller heartbeat timeout")
|
||||
health = false
|
||||
if hc.Config.RunServiceProxy {
|
||||
if time.Since(hc.Status.NetworkServicesControllerAlive) > hc.Config.IpvsSyncPeriod+3*time.Second {
|
||||
glog.Error("NetworkService Controller heartbeat missed")
|
||||
health = false
|
||||
}
|
||||
}
|
||||
|
||||
if hc.Config.MetricsEnabled {
|
||||
if time.Since(hc.Status.MetricsControllerAlive) > 3*time.Second {
|
||||
glog.Error("Metrics Controller heartbeat missed")
|
||||
health = false
|
||||
}
|
||||
}
|
||||
|
||||
return health
|
||||
}
|
||||
|
||||
@ -90,7 +112,6 @@ func (hc *HealthController) Run(healthChan <-chan *ControllerHeartbeat, stopCh <
|
||||
|
||||
srv := &http.Server{Addr: ":" + strconv.Itoa(int(hc.HealthPort)), Handler: http.DefaultServeMux}
|
||||
|
||||
// add prometheus handler on metrics path
|
||||
http.HandleFunc("/healthz", hc.Handler)
|
||||
|
||||
go func() {
|
||||
|
@ -121,13 +121,13 @@ func (npc *NetworkPolicyController) Run(healthChan chan<- *ControllerHeartbeat,
|
||||
err := npc.Sync()
|
||||
if err != nil {
|
||||
glog.Errorf("Error during periodic sync: " + err.Error())
|
||||
} else {
|
||||
sendHeartBeat(healthChan, "NPC")
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
sendHeartBeat(healthChan, "NPC")
|
||||
|
||||
select {
|
||||
case <-stopCh:
|
||||
glog.Infof("Shutting down network policies controller")
|
||||
|
@ -130,13 +130,16 @@ func (nsc *NetworkServicesController) Run(healthChan chan<- *ControllerHeartbeat
|
||||
|
||||
if watchers.PodWatcher.HasSynced() && watchers.NetworkPolicyWatcher.HasSynced() {
|
||||
glog.V(1).Info("Performing periodic sync of ipvs services")
|
||||
nsc.sync()
|
||||
err := nsc.sync()
|
||||
if err != nil {
|
||||
glog.Errorf("Error during periodic ipvs sync: " + err.Error())
|
||||
} else {
|
||||
sendHeartBeat(healthChan, "NSC")
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
sendHeartBeat(healthChan, "NSC")
|
||||
|
||||
select {
|
||||
case <-stopCh:
|
||||
glog.Info("Shutting down network services controller")
|
||||
@ -146,20 +149,28 @@ func (nsc *NetworkServicesController) Run(healthChan chan<- *ControllerHeartbeat
|
||||
}
|
||||
}
|
||||
|
||||
func (nsc *NetworkServicesController) sync() {
|
||||
func (nsc *NetworkServicesController) sync() error {
|
||||
var err error
|
||||
nsc.mu.Lock()
|
||||
defer nsc.mu.Unlock()
|
||||
|
||||
nsc.serviceMap = buildServicesInfo()
|
||||
nsc.endpointsMap = buildEndpointsInfo()
|
||||
err := nsc.syncHairpinIptablesRules()
|
||||
err = nsc.syncHairpinIptablesRules()
|
||||
if err != nil {
|
||||
glog.Errorf("Error syncing hairpin iptable rules: %s", err.Error())
|
||||
}
|
||||
nsc.syncIpvsServices(nsc.serviceMap, nsc.endpointsMap)
|
||||
|
||||
err = nsc.syncIpvsServices(nsc.serviceMap, nsc.endpointsMap)
|
||||
if err != nil {
|
||||
glog.Errorf("Error syncing IPVS services: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
if nsc.MetricsEnabled {
|
||||
nsc.publishMetrics(nsc.serviceMap)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (nsc *NetworkServicesController) publishMetrics(serviceInfoMap serviceInfoMap) error {
|
||||
|
@ -111,7 +111,9 @@ func (kr *KubeRouter) stopApiWatchers() {
|
||||
func (kr *KubeRouter) Run() error {
|
||||
var err error
|
||||
var wg sync.WaitGroup
|
||||
|
||||
healthChan := make(chan *controllers.ControllerHeartbeat, 10)
|
||||
defer close(healthChan)
|
||||
|
||||
stopCh := make(chan struct{})
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user