mirror of
https://github.com/cloudnativelabs/kube-router.git
synced 2025-10-07 16:01:08 +02:00
more work on healthchecks
This commit is contained in:
parent
77cb340cfd
commit
e53aef280c
@ -1,6 +1,7 @@
|
|||||||
package controllers
|
package controllers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
@ -42,15 +43,23 @@ func sendHeartBeat(channel chan<- *ControllerHeartbeat, controller string) {
|
|||||||
func (hc *HealthController) Handler(w http.ResponseWriter, req *http.Request) {
|
func (hc *HealthController) Handler(w http.ResponseWriter, req *http.Request) {
|
||||||
if hc.Status.Healthy {
|
if hc.Status.Healthy {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Write([]byte("These aren't the droids you're looking for\n"))
|
w.Write([]byte("OK\n"))
|
||||||
} else {
|
} else {
|
||||||
w.WriteHeader(http.StatusInternalServerError)
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
w.Write([]byte("These are the droids you're looking for\n"))
|
statusText := fmt.Sprintf("Service controller last alive %s\n ago"+
|
||||||
|
"Routing controller last alive: %s\n ago"+
|
||||||
|
"Policy controller last alive: %s\n ago"+
|
||||||
|
"Metrics controller last alive: %s\n ago",
|
||||||
|
time.Since(hc.Status.NetworkServicesControllerAlive),
|
||||||
|
time.Since(hc.Status.NetworkRoutingControllerAlive),
|
||||||
|
time.Since(hc.Status.NetworkPolicyControllerAlive),
|
||||||
|
time.Since(hc.Status.MetricsControllerAlive))
|
||||||
|
w.Write([]byte(statusText))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (hc *HealthController) HandleHeartbeat(beat *ControllerHeartbeat) {
|
func (hc *HealthController) HandleHeartbeat(beat *ControllerHeartbeat) {
|
||||||
glog.Infof("Received heartbeat from %s", beat.Component)
|
glog.V(3).Infof("Received heartbeat from %s", beat.Component)
|
||||||
switch component := beat.Component; component {
|
switch component := beat.Component; component {
|
||||||
case "NSC":
|
case "NSC":
|
||||||
hc.Status.NetworkServicesControllerAlive = time.Now()
|
hc.Status.NetworkServicesControllerAlive = time.Now()
|
||||||
@ -64,22 +73,35 @@ func (hc *HealthController) HandleHeartbeat(beat *ControllerHeartbeat) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (hc *HealthController) CheckHealth() bool {
|
func (hc *HealthController) CheckHealth() bool {
|
||||||
glog.V(4).Info("Checking components")
|
|
||||||
health := true
|
health := true
|
||||||
|
if hc.Config.RunFirewall {
|
||||||
if time.Since(hc.Status.NetworkPolicyControllerAlive) > hc.Config.IPTablesSyncPeriod+3*time.Second {
|
if time.Since(hc.Status.NetworkPolicyControllerAlive) > hc.Config.IPTablesSyncPeriod+3*time.Second {
|
||||||
glog.Error("Network Policy Controller heartbeat timeout")
|
glog.Error("Network Policy Controller heartbeat missed")
|
||||||
health = false
|
health = false
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if hc.Config.RunRouter {
|
||||||
if time.Since(hc.Status.NetworkRoutingControllerAlive) > hc.Config.RoutesSyncPeriod+3*time.Second {
|
if time.Since(hc.Status.NetworkRoutingControllerAlive) > hc.Config.RoutesSyncPeriod+3*time.Second {
|
||||||
glog.Error("Network Routing Controller heartbeat timeout")
|
glog.Error("Network Routing Controller heartbeat missed")
|
||||||
health = false
|
health = false
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if hc.Config.RunServiceProxy {
|
||||||
if time.Since(hc.Status.NetworkServicesControllerAlive) > hc.Config.IpvsSyncPeriod+3*time.Second {
|
if time.Since(hc.Status.NetworkServicesControllerAlive) > hc.Config.IpvsSyncPeriod+3*time.Second {
|
||||||
glog.Error("NetworkService Controller heartbeat timeout")
|
glog.Error("NetworkService Controller heartbeat missed")
|
||||||
health = false
|
health = false
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if hc.Config.MetricsEnabled {
|
||||||
|
if time.Since(hc.Status.MetricsControllerAlive) > 3*time.Second {
|
||||||
|
glog.Error("Metrics Controller heartbeat missed")
|
||||||
|
health = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return health
|
return health
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +112,6 @@ func (hc *HealthController) Run(healthChan <-chan *ControllerHeartbeat, stopCh <
|
|||||||
|
|
||||||
srv := &http.Server{Addr: ":" + strconv.Itoa(int(hc.HealthPort)), Handler: http.DefaultServeMux}
|
srv := &http.Server{Addr: ":" + strconv.Itoa(int(hc.HealthPort)), Handler: http.DefaultServeMux}
|
||||||
|
|
||||||
// add prometheus handler on metrics path
|
|
||||||
http.HandleFunc("/healthz", hc.Handler)
|
http.HandleFunc("/healthz", hc.Handler)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -121,13 +121,13 @@ func (npc *NetworkPolicyController) Run(healthChan chan<- *ControllerHeartbeat,
|
|||||||
err := npc.Sync()
|
err := npc.Sync()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Error during periodic sync: " + err.Error())
|
glog.Errorf("Error during periodic sync: " + err.Error())
|
||||||
|
} else {
|
||||||
|
sendHeartBeat(healthChan, "NPC")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
sendHeartBeat(healthChan, "NPC")
|
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-stopCh:
|
case <-stopCh:
|
||||||
glog.Infof("Shutting down network policies controller")
|
glog.Infof("Shutting down network policies controller")
|
||||||
|
@ -130,13 +130,16 @@ func (nsc *NetworkServicesController) Run(healthChan chan<- *ControllerHeartbeat
|
|||||||
|
|
||||||
if watchers.PodWatcher.HasSynced() && watchers.NetworkPolicyWatcher.HasSynced() {
|
if watchers.PodWatcher.HasSynced() && watchers.NetworkPolicyWatcher.HasSynced() {
|
||||||
glog.V(1).Info("Performing periodic sync of ipvs services")
|
glog.V(1).Info("Performing periodic sync of ipvs services")
|
||||||
nsc.sync()
|
err := nsc.sync()
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Error during periodic ipvs sync: " + err.Error())
|
||||||
|
} else {
|
||||||
|
sendHeartBeat(healthChan, "NSC")
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
sendHeartBeat(healthChan, "NSC")
|
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-stopCh:
|
case <-stopCh:
|
||||||
glog.Info("Shutting down network services controller")
|
glog.Info("Shutting down network services controller")
|
||||||
@ -146,20 +149,28 @@ func (nsc *NetworkServicesController) Run(healthChan chan<- *ControllerHeartbeat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nsc *NetworkServicesController) sync() {
|
func (nsc *NetworkServicesController) sync() error {
|
||||||
|
var err error
|
||||||
nsc.mu.Lock()
|
nsc.mu.Lock()
|
||||||
defer nsc.mu.Unlock()
|
defer nsc.mu.Unlock()
|
||||||
|
|
||||||
nsc.serviceMap = buildServicesInfo()
|
nsc.serviceMap = buildServicesInfo()
|
||||||
nsc.endpointsMap = buildEndpointsInfo()
|
nsc.endpointsMap = buildEndpointsInfo()
|
||||||
err := nsc.syncHairpinIptablesRules()
|
err = nsc.syncHairpinIptablesRules()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("Error syncing hairpin iptable rules: %s", err.Error())
|
glog.Errorf("Error syncing hairpin iptable rules: %s", err.Error())
|
||||||
}
|
}
|
||||||
nsc.syncIpvsServices(nsc.serviceMap, nsc.endpointsMap)
|
|
||||||
|
err = nsc.syncIpvsServices(nsc.serviceMap, nsc.endpointsMap)
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Error syncing IPVS services: %s", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if nsc.MetricsEnabled {
|
if nsc.MetricsEnabled {
|
||||||
nsc.publishMetrics(nsc.serviceMap)
|
nsc.publishMetrics(nsc.serviceMap)
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nsc *NetworkServicesController) publishMetrics(serviceInfoMap serviceInfoMap) error {
|
func (nsc *NetworkServicesController) publishMetrics(serviceInfoMap serviceInfoMap) error {
|
||||||
|
@ -111,7 +111,9 @@ func (kr *KubeRouter) stopApiWatchers() {
|
|||||||
func (kr *KubeRouter) Run() error {
|
func (kr *KubeRouter) Run() error {
|
||||||
var err error
|
var err error
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
healthChan := make(chan *controllers.ControllerHeartbeat, 10)
|
healthChan := make(chan *controllers.ControllerHeartbeat, 10)
|
||||||
|
defer close(healthChan)
|
||||||
|
|
||||||
stopCh := make(chan struct{})
|
stopCh := make(chan struct{})
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user