kube-router/app/controllers/health_controller.go
2018-02-04 21:25:49 +01:00

151 lines
4.0 KiB
Go

package controllers
import (
"fmt"
"net/http"
"strconv"
"sync"
"time"
"github.com/cloudnativelabs/kube-router/app/options"
"github.com/golang/glog"
"golang.org/x/net/context"
)
type ControllerHeartbeat struct {
Component string
Lastheartbeat time.Time
}
//HealthController reports the health of the controller loops as a http endpoint
type HealthController struct {
HealthPort uint16
Status HealthStats
Config *options.KubeRouterConfig
}
type HealthStats struct {
Healthy bool
MetricsControllerAlive time.Time
NetworkPolicyControllerAlive time.Time
NetworkRoutingControllerAlive time.Time
NetworkServicesControllerAlive time.Time
}
func sendHeartBeat(channel chan<- *ControllerHeartbeat, controller string) {
heartbeat := ControllerHeartbeat{
Component: controller,
Lastheartbeat: time.Now(),
}
channel <- &heartbeat
}
func (hc *HealthController) Handler(w http.ResponseWriter, req *http.Request) {
if hc.Status.Healthy {
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK\n"))
} else {
w.WriteHeader(http.StatusInternalServerError)
statusText := fmt.Sprintf("Service controller last alive %s\n ago"+
"Routing controller last alive: %s\n ago"+
"Policy controller last alive: %s\n ago"+
"Metrics controller last alive: %s\n ago",
time.Since(hc.Status.NetworkServicesControllerAlive),
time.Since(hc.Status.NetworkRoutingControllerAlive),
time.Since(hc.Status.NetworkPolicyControllerAlive),
time.Since(hc.Status.MetricsControllerAlive))
w.Write([]byte(statusText))
}
}
func (hc *HealthController) HandleHeartbeat(beat *ControllerHeartbeat) {
glog.V(3).Infof("Received heartbeat from %s", beat.Component)
switch component := beat.Component; component {
case "NSC":
hc.Status.NetworkServicesControllerAlive = time.Now()
case "NRC":
hc.Status.NetworkRoutingControllerAlive = time.Now()
case "NPC":
hc.Status.NetworkPolicyControllerAlive = time.Now()
case "MC":
hc.Status.MetricsControllerAlive = time.Now()
}
}
func (hc *HealthController) CheckHealth() bool {
health := true
if hc.Config.RunFirewall {
if time.Since(hc.Status.NetworkPolicyControllerAlive) > hc.Config.IPTablesSyncPeriod+3*time.Second {
glog.Error("Network Policy Controller heartbeat missed")
health = false
}
}
if hc.Config.RunRouter {
if time.Since(hc.Status.NetworkRoutingControllerAlive) > hc.Config.RoutesSyncPeriod+3*time.Second {
glog.Error("Network Routing Controller heartbeat missed")
health = false
}
}
if hc.Config.RunServiceProxy {
if time.Since(hc.Status.NetworkServicesControllerAlive) > hc.Config.IpvsSyncPeriod+3*time.Second {
glog.Error("NetworkService Controller heartbeat missed")
health = false
}
}
if hc.Config.MetricsEnabled {
if time.Since(hc.Status.MetricsControllerAlive) > 3*time.Second {
glog.Error("Metrics Controller heartbeat missed")
health = false
}
}
return health
}
func (hc *HealthController) Run(healthChan <-chan *ControllerHeartbeat, stopCh <-chan struct{}, wg *sync.WaitGroup) error {
t := time.NewTicker(1 * time.Second)
defer wg.Done()
glog.Info("Starting health controller")
srv := &http.Server{Addr: ":" + strconv.Itoa(int(hc.HealthPort)), Handler: http.DefaultServeMux}
http.HandleFunc("/healthz", hc.Handler)
go func() {
if err := srv.ListenAndServe(); err != nil {
// cannot panic, because this probably is an intentional close
glog.Errorf("Health controller error: %s", err)
}
}()
for {
hc.Status.Healthy = hc.CheckHealth()
select {
case <-stopCh:
glog.Infof("Shutting down health controller")
if err := srv.Shutdown(context.Background()); err != nil {
glog.Errorf("could not shutdown: %v", err)
}
return nil
case heartbeat := <-healthChan:
hc.HandleHeartbeat(heartbeat)
case <-t.C:
glog.V(4).Info("Health controller tick")
}
}
}
func NewHealthController(config *options.KubeRouterConfig) (*HealthController, error) {
hc := HealthController{
Config: config,
HealthPort: config.HealthPort,
}
return &hc, nil
}