mirror of
https://github.com/cloudnativelabs/kube-router.git
synced 2025-12-16 09:11:38 +01:00
Roffe/metrics polish (#595)
* update metrics docs & dashboard * renamed `namespace` label to `svc_namespace` for service metrics as it would be overwritten by most Prometheus setups * Made histograms for all the controller sync times for better visualization * added `controller_routes_sync_time`, `controller_bgp_advertisements_sent` & `controller_policy_chains_sync_time` metrics
This commit is contained in:
parent
0cdaa4362f
commit
e5d599b14c
Binary file not shown.
|
Before Width: | Height: | Size: 555 KiB After Width: | Height: | Size: 967 KiB |
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,8 @@ The default values unless other specified is
|
||||
By enabling [Kubernetes SD](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#<kubernetes_sd_config>) in Prometheus configuration & adding required annotations Prometheus can automaticly discover & scrape kube-router metrics
|
||||
|
||||
## Version notes
|
||||
kube-router v0.2.4 received a metrics overhaul where some metrics were changed into histograms, additional metrics was also added. Please make sure you are using the latest dashboard version with versions => v0.2.4
|
||||
|
||||
kube-router 0.1.0-rc2 and upwards supports the runtime configuration for controlling where to expose the metrics. If you are using a older version, metrics path & port is locked to `/metrics` & `8080`
|
||||
|
||||
## Supported annotations
|
||||
@ -56,14 +58,20 @@ The following metrics is exposed by kube-router prefixed by `kube_router_`
|
||||
* controller_bgp_peers
|
||||
Number of BGP peers of the instance
|
||||
* controller_bgp_advertisements_received
|
||||
Number of total BGP advertisements received since kube-router start
|
||||
Total number of BGP advertisements received since kube-router started
|
||||
* controller_bgp_advertisements_sent
|
||||
Total number of BGP advertisements sent since kube-router started
|
||||
* controller_bgp_internal_peers_sync_time
|
||||
Time it took for the BGP internal peer sync loop to complete
|
||||
* controller_routes_sync_time
|
||||
Time it took for controller to sync routes
|
||||
|
||||
### run-firewall=true
|
||||
|
||||
* controller_iptables_sync_time
|
||||
Time it took for the iptables sync loop to complete
|
||||
* controller_policy_chains_sync_time
|
||||
Time it took for controller to sync policy chains
|
||||
|
||||
### run-service-proxy = true
|
||||
|
||||
@ -95,7 +103,7 @@ The following metrics is exposed by kube-router prefixed by `kube_router_`
|
||||
Outgoing bytes per second
|
||||
|
||||
To get a grouped list of CPS for each service a Prometheus query could look like this e.g:
|
||||
`sum(kube_router_service_cps) by (namespace, service_name)`
|
||||
`sum(kube_router_service_cps) by (svc_namespace, service_name)`
|
||||
|
||||
## Grafana Dashboard
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -26,7 +27,6 @@ import (
|
||||
"k8s.io/client-go/kubernetes"
|
||||
listers "k8s.io/client-go/listers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -215,7 +215,7 @@ func (npc *NetworkPolicyController) Sync() error {
|
||||
defer func() {
|
||||
endTime := time.Since(start)
|
||||
if npc.MetricsEnabled {
|
||||
metrics.ControllerIptablesSyncTime.WithLabelValues().Set(float64(endTime.Seconds()))
|
||||
metrics.ControllerIptablesSyncTime.Observe(endTime.Seconds())
|
||||
}
|
||||
glog.V(1).Infof("sync iptables took %v", endTime)
|
||||
}()
|
||||
@ -258,7 +258,12 @@ func (npc *NetworkPolicyController) Sync() error {
|
||||
// policyspec is evaluated to set of matching pods, which are grouped in to a
|
||||
// ipset used for source ip addr matching.
|
||||
func (npc *NetworkPolicyController) syncNetworkPolicyChains(version string) (map[string]bool, map[string]bool, error) {
|
||||
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
endTime := time.Since(start)
|
||||
metrics.ControllerPolicyChainsSyncTime.Observe(endTime.Seconds())
|
||||
glog.V(2).Infof("Syncing network policy chains took %v", endTime)
|
||||
}()
|
||||
activePolicyChains := make(map[string]bool)
|
||||
activePolicyIpSets := make(map[string]bool)
|
||||
|
||||
@ -1536,6 +1541,7 @@ func NewNetworkPolicyController(clientset kubernetes.Interface,
|
||||
if config.MetricsEnabled {
|
||||
//Register the metrics for this controller
|
||||
prometheus.MustRegister(metrics.ControllerIptablesSyncTime)
|
||||
prometheus.MustRegister(metrics.ControllerPolicyChainsSyncTime)
|
||||
npc.MetricsEnabled = true
|
||||
}
|
||||
|
||||
|
||||
@ -373,7 +373,9 @@ func (nsc *NetworkServicesController) publishMetrics(serviceInfoMap serviceInfoM
|
||||
defer func() {
|
||||
endTime := time.Since(start)
|
||||
glog.V(2).Infof("Publishing IPVS metrics took %v", endTime)
|
||||
metrics.ControllerIpvsMetricsExportTime.WithLabelValues().Set(float64(endTime.Seconds()))
|
||||
if nsc.MetricsEnabled {
|
||||
metrics.ControllerIpvsMetricsExportTime.Observe(float64(endTime.Seconds()))
|
||||
}
|
||||
}()
|
||||
|
||||
ipvsSvcs, err := nsc.ln.ipvsGetServices()
|
||||
@ -429,7 +431,7 @@ func (nsc *NetworkServicesController) publishMetrics(serviceInfoMap serviceInfoM
|
||||
metrics.ServicePpsIn.WithLabelValues(svc.namespace, svc.name, svcVip, svc.protocol, strconv.Itoa(svc.port)).Set(float64(ipvsSvc.Stats.PPSIn))
|
||||
metrics.ServicePpsOut.WithLabelValues(svc.namespace, svc.name, svcVip, svc.protocol, strconv.Itoa(svc.port)).Set(float64(ipvsSvc.Stats.PPSOut))
|
||||
metrics.ServiceTotalConn.WithLabelValues(svc.namespace, svc.name, svcVip, svc.protocol, strconv.Itoa(svc.port)).Set(float64(ipvsSvc.Stats.Connections))
|
||||
metrics.ControllerIpvsServices.WithLabelValues().Set(float64(len(ipvsSvcs)))
|
||||
metrics.ControllerIpvsServices.Set(float64(len(ipvsSvcs)))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -528,7 +530,7 @@ func (nsc *NetworkServicesController) syncIpvsServices(serviceInfoMap serviceInf
|
||||
defer func() {
|
||||
endTime := time.Since(start)
|
||||
if nsc.MetricsEnabled {
|
||||
metrics.ControllerIpvsServicesSyncTime.WithLabelValues().Set(float64(endTime.Seconds()))
|
||||
metrics.ControllerIpvsServicesSyncTime.Observe(endTime.Seconds())
|
||||
}
|
||||
glog.V(1).Infof("sync ipvs services took %v", endTime)
|
||||
}()
|
||||
|
||||
@ -30,7 +30,9 @@ func (nrc *NetworkRoutingController) syncInternalPeers() {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
endTime := time.Since(start)
|
||||
metrics.ControllerBGPInternalPeersSyncTime.WithLabelValues().Set(float64(endTime.Seconds()))
|
||||
if nrc.MetricsEnabled {
|
||||
metrics.ControllerBGPInternalPeersSyncTime.Observe(endTime.Seconds())
|
||||
}
|
||||
glog.V(2).Infof("Syncing BGP peers for the node took %v", endTime)
|
||||
}()
|
||||
|
||||
@ -40,8 +42,9 @@ func (nrc *NetworkRoutingController) syncInternalPeers() {
|
||||
glog.Errorf("Failed to list nodes from API server due to: %s. Can not perform BGP peer sync", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
metrics.ControllerBPGpeers.WithLabelValues().Set(float64(len(nodes.Items)))
|
||||
if nrc.MetricsEnabled {
|
||||
metrics.ControllerBPGpeers.Set(float64(len(nodes.Items)))
|
||||
}
|
||||
// establish peer and add Pod CIDRs with current set of nodes
|
||||
currentNodes := make([]string, 0)
|
||||
for _, node := range nodes.Items {
|
||||
|
||||
@ -325,7 +325,7 @@ func (nrc *NetworkRoutingController) watchBgpUpdates() {
|
||||
case *gobgp.WatchEventBestPath:
|
||||
glog.V(3).Info("Processing bgp route advertisement from peer")
|
||||
if nrc.MetricsEnabled {
|
||||
metrics.ControllerBGPadvertisementsReceived.WithLabelValues().Add(float64(1))
|
||||
metrics.ControllerBGPadvertisementsReceived.Inc()
|
||||
}
|
||||
for _, path := range msg.PathList {
|
||||
if path.IsLocal() {
|
||||
@ -342,6 +342,9 @@ func (nrc *NetworkRoutingController) watchBgpUpdates() {
|
||||
}
|
||||
|
||||
func (nrc *NetworkRoutingController) advertisePodRoute() error {
|
||||
if nrc.MetricsEnabled {
|
||||
metrics.ControllerBGPadvertisementsSent.Inc()
|
||||
}
|
||||
cidr, err := utils.GetPodCidrFromNodeSpec(nrc.clientset, nrc.hostnameOverride)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -486,6 +489,12 @@ func (nrc *NetworkRoutingController) Cleanup() {
|
||||
}
|
||||
|
||||
func (nrc *NetworkRoutingController) syncNodeIPSets() error {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
if nrc.MetricsEnabled {
|
||||
metrics.ControllerRoutesSyncTime.Observe(time.Since(start).Seconds())
|
||||
}
|
||||
}()
|
||||
// Get the current list of the nodes from API server
|
||||
nodes, err := nrc.clientset.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
@ -786,6 +795,7 @@ func NewNetworkRoutingController(clientset kubernetes.Interface,
|
||||
prometheus.MustRegister(metrics.ControllerBGPadvertisementsReceived)
|
||||
prometheus.MustRegister(metrics.ControllerBGPInternalPeersSyncTime)
|
||||
prometheus.MustRegister(metrics.ControllerBPGpeers)
|
||||
prometheus.MustRegister(metrics.ControllerRoutesSyncTime)
|
||||
nrc.MetricsEnabled = true
|
||||
}
|
||||
|
||||
|
||||
@ -21,100 +21,130 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
// ServiceTotalConn Total incoming connections made
|
||||
ServiceTotalConn = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_total_connections",
|
||||
Help: "Total incoming connections made",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServicePacketsIn Total incoming packets
|
||||
ServicePacketsIn = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_packets_in",
|
||||
Help: "Total incoming packets",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServicePacketsOut Total outgoing packets
|
||||
ServicePacketsOut = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_packets_out",
|
||||
Help: "Total outgoing packets",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServiceBytesIn Total incoming bytes
|
||||
ServiceBytesIn = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_bytes_in",
|
||||
Help: "Total incoming bytes",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServiceBytesOut Total outgoing bytes
|
||||
ServiceBytesOut = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_bytes_out",
|
||||
Help: "Total outgoing bytes",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServicePpsIn Incoming packets per second
|
||||
ServicePpsIn = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_pps_in",
|
||||
Help: "Incoming packets per second",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServicePpsOut Outgoing packets per second
|
||||
ServicePpsOut = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_pps_out",
|
||||
Help: "Outgoing packets per second",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServiceCPS Service connections per second
|
||||
ServiceCPS = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_cps",
|
||||
Help: "Service connections per second",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServiceBpsIn Incoming bytes per second
|
||||
ServiceBpsIn = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_bps_in",
|
||||
Help: "Incoming bytes per second",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ServiceBpsOut Outgoing bytes per second
|
||||
ServiceBpsOut = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "service_bps_out",
|
||||
Help: "Outgoing bytes per second",
|
||||
}, []string{"namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
ControllerIpvsServices = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
}, []string{"svc_namespace", "service_name", "service_vip", "protocol", "port"})
|
||||
// ControllerIpvsServices Number of ipvs services in the instance
|
||||
ControllerIpvsServices = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_ipvs_services",
|
||||
Help: "Number of ipvs services in the instance",
|
||||
}, []string{})
|
||||
ControllerIptablesSyncTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
})
|
||||
// ControllerIptablesSyncTime Time it took for controller to sync iptables
|
||||
ControllerIptablesSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_iptables_sync_time",
|
||||
Help: "Time it took for controller to sync iptables",
|
||||
}, []string{})
|
||||
ControllerPublishMetricsTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_publish_metrics_time",
|
||||
Help: "Time it took to publish metrics",
|
||||
}, []string{})
|
||||
ControllerIpvsServicesSyncTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
})
|
||||
// ControllerIpvsServicesSyncTime Time it took for controller to sync ipvs services
|
||||
ControllerIpvsServicesSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_ipvs_services_sync_time",
|
||||
Help: "Time it took for controller to sync ipvs services",
|
||||
}, []string{})
|
||||
ControllerBPGpeers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
})
|
||||
// ControllerRoutesSyncTime Time it took for controller to sync ipvs services
|
||||
ControllerRoutesSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_routes_sync_time",
|
||||
Help: "Time it took for controller to sync routes",
|
||||
})
|
||||
// ControllerBPGpeers BGP peers in the runtime configuration
|
||||
ControllerBPGpeers = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_bgp_peers",
|
||||
Help: "BGP peers in the runtime configuration",
|
||||
}, []string{})
|
||||
ControllerBGPInternalPeersSyncTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
})
|
||||
// ControllerBGPInternalPeersSyncTime Time it took to sync internal bgp peers
|
||||
ControllerBGPInternalPeersSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_bgp_internal_peers_sync_time",
|
||||
Help: "Time it took to sync internal bgp peers",
|
||||
}, []string{})
|
||||
ControllerBGPadvertisementsReceived = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
})
|
||||
// ControllerBGPadvertisementsReceived Time it took to sync internal bgp peers
|
||||
ControllerBGPadvertisementsReceived = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_bgp_advertisements_received",
|
||||
Help: "Time it took to sync internal bgp peers",
|
||||
}, []string{})
|
||||
ControllerIpvsMetricsExportTime = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Help: "BGP advertisements received",
|
||||
})
|
||||
// ControllerBGPadvertisementsSent Time it took to sync internal bgp peers
|
||||
ControllerBGPadvertisementsSent = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_bgp_advertisements_sent",
|
||||
Help: "BGP advertisements sent",
|
||||
})
|
||||
// ControllerIpvsMetricsExportTime Time it took to export metrics
|
||||
ControllerIpvsMetricsExportTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_ipvs_metrics_export_time",
|
||||
Help: "Time it took to export metrics",
|
||||
}, []string{})
|
||||
})
|
||||
// ControllerPolicyChainsSyncTime Time it took for controller to sync policys
|
||||
ControllerPolicyChainsSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Name: "controller_policy_chains_sync_time",
|
||||
Help: "Time it took for controller to sync policy chains",
|
||||
})
|
||||
)
|
||||
|
||||
// MetricsController Holds settings for the metrics controller
|
||||
type MetricsController struct {
|
||||
// Controller Holds settings for the metrics controller
|
||||
type Controller struct {
|
||||
MetricsPath string
|
||||
MetricsPort uint16
|
||||
mu sync.Mutex
|
||||
@ -122,7 +152,7 @@ type MetricsController struct {
|
||||
}
|
||||
|
||||
// Run prometheus metrics controller
|
||||
func (mc *MetricsController) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, wg *sync.WaitGroup) error {
|
||||
func (mc *Controller) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, wg *sync.WaitGroup) error {
|
||||
t := time.NewTicker(3 * time.Second)
|
||||
defer wg.Done()
|
||||
glog.Info("Starting metrics controller")
|
||||
@ -157,8 +187,8 @@ func (mc *MetricsController) Run(healthChan chan<- *healthcheck.ControllerHeartb
|
||||
}
|
||||
|
||||
// NewMetricsController returns new MetricController object
|
||||
func NewMetricsController(clientset kubernetes.Interface, config *options.KubeRouterConfig) (*MetricsController, error) {
|
||||
mc := MetricsController{}
|
||||
func NewMetricsController(clientset kubernetes.Interface, config *options.KubeRouterConfig) (*Controller, error) {
|
||||
mc := Controller{}
|
||||
mc.MetricsPath = config.MetricsPath
|
||||
mc.MetricsPort = config.MetricsPort
|
||||
return &mc, nil
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user