Merge pull request #796 from paulfantom/cpu_steal

CPU steal time shouldn't be counted into CPU usage
This commit is contained in:
Sergiusz Urbaniak 2020-11-25 14:25:40 +01:00 committed by GitHub
commit b9c95f02f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 6 deletions

View File

@ -5,7 +5,7 @@
name: 'kube-prometheus-node-recording.rules', name: 'kube-prometheus-node-recording.rules',
rules: [ rules: [
{ {
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)', expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)',
record: 'instance:node_cpu:rate:sum', record: 'instance:node_cpu:rate:sum',
}, },
{ {
@ -17,11 +17,11 @@
record: 'instance:node_network_transmit_bytes:rate:sum', record: 'instance:node_network_transmit_bytes:rate:sum',
}, },
{ {
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)', expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)',
record: 'instance:node_cpu:ratio', record: 'instance:node_cpu:ratio',
}, },
{ {
expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))', expr: 'sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))',
record: 'cluster:node_cpu:sum_rate5m', record: 'cluster:node_cpu:sum_rate5m',
}, },
{ {

View File

@ -762,15 +762,15 @@ spec:
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- name: kube-prometheus-node-recording.rules - name: kube-prometheus-node-recording.rules
rules: rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance) - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)
record: instance:node_cpu:rate:sum record: instance:node_cpu:rate:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
record: cluster:node_cpu:sum_rate5m record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
record: cluster:node_cpu:ratio record: cluster:node_cpu:ratio