jsonnet: readjust prometheus-adapter intervals

Previously, prometheus-adapter configuration wasn't taking into account
the scrape interval of kubelet, node-exporter and windows-exporter
leading to getting non fresh results, and even negative results from the
CPU queries when the irate() function was extrapolating data.
To fix that, we want to set the interval used in the irate() function in
the CPU queries to 4x scrape interval in order to extrapolate data
between the last two scrapes. This will improve the freshness of the cpu
usage exposed and prevent incorrect extrapolations.

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
This commit is contained in:
Damien Grisonnet 2021-06-30 16:53:02 +02:00
parent 4d8104817d
commit 8812e45501
4 changed files with 30 additions and 4 deletions

View File

@ -22,13 +22,24 @@ local defaults = {
for labelName in std.objectFields(defaults.commonLabels)
if !std.setMember(labelName, ['app.kubernetes.io/version'])
},
// Default range intervals are equal to 4 times the default scrape interval.
// This is done in order to follow Prometheus rule of thumb with irate().
rangeIntervals: {
kubelet: '4m',
nodeExporter: '4m',
windowsExporter: '4m',
},
prometheusURL: error 'must provide prometheusURL',
config: {
resourceRules: {
cpu: {
containerQuery: 'sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[5m])) by (<<.GroupBy>>)',
nodeQuery: 'sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)',
containerQuery: |||
sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[%(kubelet)s])) by (<<.GroupBy>>)
||| % $.rangeIntervals,
nodeQuery: |||
sum(1 - irate(node_cpu_seconds_total{mode="idle"}[%(nodeExporter)s]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[%(windowsExporter)s])) by (<<.GroupBy>>)
||| % $.rangeIntervals,
resources: {
overrides: {
node: { resource: 'node' },

View File

@ -0,0 +1,7 @@
{
// rangeInterval takes a scrape interval and convert its to a range interval
// following Prometheus rule of thumb for rate() and irate().
rangeInterval(i='1m'):
local interval = std.parseInt(std.substr(i, 0, std.length(i) - 1));
interval * 4 + i[std.length(i) - 1],
}

View File

@ -11,6 +11,8 @@ local prometheus = import './components/prometheus.libsonnet';
local platformPatch = import './platforms/platforms.libsonnet';
local utils = import './lib/utils.libsonnet';
{
// using `values` as this is similar to helm
values:: {
@ -97,6 +99,10 @@ local platformPatch = import './platforms/platforms.libsonnet';
version: $.values.common.versions.prometheusAdapter,
image: $.values.common.images.prometheusAdapter,
prometheusURL: 'http://prometheus-' + $.values.prometheus.name + '.' + $.values.common.namespace + '.svc.cluster.local:9090/',
rangeIntervals+: {
kubelet: utils.rangeInterval($.kubernetesControlPlane.serviceMonitorKubelet.spec.endpoints[0].interval),
nodeExporter: utils.rangeInterval($.nodeExporter.serviceMonitor.spec.endpoints[0].interval),
},
},
prometheusOperator: {
namespace: $.values.common.namespace,

View File

@ -4,8 +4,10 @@ data:
"resourceRules":
"cpu":
"containerLabel": "container"
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode=\"idle\", job=\"windows-exporter\",<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)"
"containerQuery": |
sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s])) by (<<.GroupBy>>)
"nodeQuery": |
sum(1 - irate(node_cpu_seconds_total{mode="idle"}[60s]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) or sum (1- irate(windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m])) by (<<.GroupBy>>)
"resources":
"overrides":
"namespace":