go-lib for promtool

Signed-off-by: DrAuYueng <ouyang1204@gmail.com>
This commit is contained in:
DrAuYueng 2025-07-28 15:37:31 +08:00
parent 2c04f2d7b1
commit 479aa98772
51 changed files with 41475 additions and 0 deletions

554
promtoollib/config.go Normal file
View File

@ -0,0 +1,554 @@
// Copyright 2025 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package promtoollib
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
promconfig "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/file"
"github.com/prometheus/prometheus/discovery/kubernetes"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/notifier"
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
)
const (
successExitCode = 0
failureExitCode = 1
// Exit code 3 is used for "one or more lint issues detected".
lintErrExitCode = 3
lintOptionAll = "all"
lintOptionDuplicateRules = "duplicate-rules"
LintOptionTooLongScrapeInterval = "too-long-scrape-interval"
lintOptionNone = "none"
checkHealth = "/-/healthy"
checkReadiness = "/-/ready"
)
var errLint = errors.New("lint error")
type RulesLintConfig struct {
all bool
duplicateRules bool
fatal bool
ignoreUnknownFields bool
}
func NewRulesLintConfig(stringVal string, fatal, ignoreUnknownFields bool) RulesLintConfig {
items := strings.Split(stringVal, ",")
ls := RulesLintConfig{
fatal: fatal,
ignoreUnknownFields: ignoreUnknownFields,
}
for _, setting := range items {
switch setting {
case lintOptionAll:
ls.all = true
case lintOptionDuplicateRules:
ls.duplicateRules = true
case lintOptionNone:
default:
fmt.Printf("WARNING: unknown lint option: %q\n", setting)
}
}
return ls
}
type ConfigLintConfig struct {
RulesLintConfig
lookbackDelta model.Duration
}
func NewConfigLintConfig(optionsStr string, fatal, ignoreUnknownFields bool, lookbackDelta model.Duration) ConfigLintConfig {
c := ConfigLintConfig{
RulesLintConfig: RulesLintConfig{
fatal: fatal,
},
}
lintNone := false
var rulesOptions []string
for _, option := range strings.Split(optionsStr, ",") {
switch option {
case lintOptionAll, LintOptionTooLongScrapeInterval:
c.lookbackDelta = lookbackDelta
if option == lintOptionAll {
rulesOptions = append(rulesOptions, lintOptionAll)
}
case lintOptionNone:
lintNone = true
default:
rulesOptions = append(rulesOptions, option)
}
}
if lintNone {
c.lookbackDelta = 0
rulesOptions = nil
}
if len(rulesOptions) > 0 {
c.RulesLintConfig = NewRulesLintConfig(strings.Join(rulesOptions, ","), fatal, ignoreUnknownFields)
}
return c
}
type compareRuleType struct {
metric string
label labels.Labels
}
type compareRuleTypes []compareRuleType
func (c compareRuleTypes) Len() int { return len(c) }
func (c compareRuleTypes) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c compareRuleTypes) Less(i, j int) bool { return compare(c[i], c[j]) < 0 }
func compare(a, b compareRuleType) int {
if res := strings.Compare(a.metric, b.metric); res != 0 {
return res
}
return labels.Compare(a.label, b.label)
}
func (ls RulesLintConfig) lintDuplicateRules() bool {
return ls.all || ls.duplicateRules
}
type OutputWriter interface {
OutWriter() io.Writer
ErrWriter() io.Writer
}
type StdWriter struct{}
func (w *StdWriter) OutWriter() io.Writer {
return os.Stdout
}
func (w *StdWriter) ErrWriter() io.Writer {
return os.Stderr
}
type ByteBufferWriter struct {
outBuffer *bytes.Buffer
}
func (b *ByteBufferWriter) String() string {
return b.outBuffer.String()
}
func (b *ByteBufferWriter) OutWriter() io.Writer {
return b.outBuffer
}
func (b *ByteBufferWriter) ErrWriter() io.Writer {
return b.outBuffer
}
func CheckConfigWithOutput(agentMode, checkSyntaxOnly bool, lintSettings ConfigLintConfig, files ...string) (int, string) {
writer := &ByteBufferWriter{
outBuffer: bytes.NewBuffer(nil),
}
exitCode := doCheckConfig(writer, agentMode, checkSyntaxOnly, lintSettings, files...)
output := writer.String()
return exitCode, output
}
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings ConfigLintConfig, files ...string) int {
return doCheckConfig(&StdWriter{}, agentMode, checkSyntaxOnly, lintSettings, files...)
}
func doCheckConfig(writer OutputWriter, agentMode, checkSyntaxOnly bool, lintSettings ConfigLintConfig, files ...string) int {
failed := false
hasErrors := false
for _, f := range files {
ruleFiles, scrapeConfigs, err := checkConfig(writer, agentMode, f, checkSyntaxOnly)
if err != nil {
fmt.Fprintln(writer.ErrWriter(), " FAILED:", err)
hasErrors = true
failed = true
} else {
if len(ruleFiles) > 0 {
fmt.Fprintf(writer.OutWriter(), " SUCCESS: %d rule files found\n", len(ruleFiles))
}
fmt.Fprintf(writer.OutWriter(), " SUCCESS: %s is valid prometheus config file syntax\n", f)
}
fmt.Fprintln(writer.OutWriter())
if !checkSyntaxOnly {
scrapeConfigsFailed := lintScrapeConfigs(writer, scrapeConfigs, lintSettings)
failed = failed || scrapeConfigsFailed
rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.RulesLintConfig)
failed = failed || rulesFailed
hasErrors = hasErrors || rulesHaveErrors
}
}
if failed && hasErrors {
return failureExitCode
}
if failed && lintSettings.fatal {
return lintErrExitCode
}
return successExitCode
}
func checkConfig(writer OutputWriter, agentMode bool, filename string, checkSyntaxOnly bool) ([]string, []*config.ScrapeConfig, error) {
fmt.Fprintln(writer.OutWriter(), "Checking", filename)
cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger())
if err != nil {
return nil, nil, err
}
var ruleFiles []string
if !checkSyntaxOnly {
for _, rf := range cfg.RuleFiles {
rfs, err := filepath.Glob(rf)
if err != nil {
return nil, nil, err
}
// If an explicit file was given, error if it is not accessible.
if !strings.Contains(rf, "*") {
if len(rfs) == 0 {
return nil, nil, fmt.Errorf("%q does not point to an existing file", rf)
}
if err := checkFileExists(rfs[0]); err != nil {
return nil, nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
}
}
ruleFiles = append(ruleFiles, rfs...)
}
}
var scfgs []*config.ScrapeConfig
if checkSyntaxOnly {
scfgs = cfg.ScrapeConfigs
} else {
var err error
scfgs, err = cfg.GetScrapeConfigs()
if err != nil {
return nil, nil, fmt.Errorf("error loading scrape configs: %w", err)
}
}
for _, scfg := range scfgs {
if !checkSyntaxOnly && scfg.HTTPClientConfig.Authorization != nil {
if err := checkFileExists(scfg.HTTPClientConfig.Authorization.CredentialsFile); err != nil {
return nil, nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
}
}
if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
return nil, nil, err
}
for _, c := range scfg.ServiceDiscoveryConfigs {
switch c := c.(type) {
case *kubernetes.SDConfig:
if err := checkTLSConfig(c.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
return nil, nil, err
}
case *file.SDConfig:
if checkSyntaxOnly {
break
}
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForScrapeConfig(targetGroups, scfg); err != nil {
return nil, nil, err
}
}
continue
}
fmt.Printf(" WARNING: file %q for file_sd in scrape job %q does not exist\n", file, scfg.JobName)
}
case discovery.StaticConfig:
if err := checkTargetGroupsForScrapeConfig(c, scfg); err != nil {
return nil, nil, err
}
}
}
}
alertConfig := cfg.AlertingConfig
for _, amcfg := range alertConfig.AlertmanagerConfigs {
for _, c := range amcfg.ServiceDiscoveryConfigs {
switch c := c.(type) {
case *file.SDConfig:
if checkSyntaxOnly {
break
}
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForAlertmanager(targetGroups, amcfg); err != nil {
return nil, nil, err
}
}
continue
}
fmt.Printf(" WARNING: file %q for file_sd in alertmanager config does not exist\n", file)
}
case discovery.StaticConfig:
if err := checkTargetGroupsForAlertmanager(c, amcfg); err != nil {
return nil, nil, err
}
}
}
}
return ruleFiles, scfgs, nil
}
func checkTLSConfig(tlsConfig promconfig.TLSConfig, checkSyntaxOnly bool) error {
if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 {
return fmt.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile)
}
if len(tlsConfig.KeyFile) > 0 && len(tlsConfig.CertFile) == 0 {
return fmt.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile)
}
if checkSyntaxOnly {
return nil
}
if err := checkFileExists(tlsConfig.CertFile); err != nil {
return fmt.Errorf("error checking client cert file %q: %w", tlsConfig.CertFile, err)
}
if err := checkFileExists(tlsConfig.KeyFile); err != nil {
return fmt.Errorf("error checking client key file %q: %w", tlsConfig.KeyFile, err)
}
return nil
}
func lintScrapeConfigs(writer OutputWriter, scrapeConfigs []*config.ScrapeConfig, lintSettings ConfigLintConfig) bool {
for _, scfg := range scrapeConfigs {
if lintSettings.lookbackDelta > 0 && scfg.ScrapeInterval >= lintSettings.lookbackDelta {
fmt.Fprintf(writer.ErrWriter(), " FAILED: too long scrape interval found, data point will be marked as stale - job: %s, interval: %s\n", scfg.JobName, scfg.ScrapeInterval)
return true
}
}
return false
}
func checkRules(files []string, ls RulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
for _, f := range files {
fmt.Println("Checking", f)
rgs, errs := rulefmt.ParseFile(f, ls.ignoreUnknownFields)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
continue
}
}
if n, errs := checkRuleGroups(rgs, ls); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
}
fmt.Println()
}
return failed, hasErrors
}
func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings RulesLintConfig) (int, []error) {
numRules := 0
for _, rg := range rgs.Groups {
numRules += len(rg.Rules)
}
if lintSettings.lintDuplicateRules() {
dRules := checkDuplicates(rgs.Groups)
if len(dRules) != 0 {
errMessage := fmt.Sprintf("%d duplicate rule(s) found.\n", len(dRules))
for _, n := range dRules {
errMessage += fmt.Sprintf("Metric: %s\nLabel(s):\n", n.metric)
n.label.Range(func(l labels.Label) {
errMessage += fmt.Sprintf("\t%s: %s\n", l.Name, l.Value)
})
}
errMessage += "Might cause inconsistency while recording expressions"
return 0, []error{fmt.Errorf("%w %s", errLint, errMessage)}
}
}
return numRules, nil
}
func checkTargetGroupsForAlertmanager(targetGroups []*targetgroup.Group, amcfg *config.AlertmanagerConfig) error {
for _, tg := range targetGroups {
if _, _, err := notifier.AlertmanagerFromGroup(tg, amcfg); err != nil {
return err
}
}
return nil
}
func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *config.ScrapeConfig) error {
var targets []*scrape.Target
lb := labels.NewBuilder(labels.EmptyLabels())
for _, tg := range targetGroups {
var failures []error
targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb)
if len(failures) > 0 {
first := failures[0]
return first
}
}
return nil
}
func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType {
var duplicates []compareRuleType
var cRules compareRuleTypes
for _, group := range groups {
for _, rule := range group.Rules {
cRules = append(cRules, compareRuleType{
metric: ruleMetric(rule),
label: rules.FromMaps(group.Labels, rule.Labels),
})
}
}
if len(cRules) < 2 {
return duplicates
}
sort.Sort(cRules)
last := cRules[0]
for i := 1; i < len(cRules); i++ {
if compare(last, cRules[i]) == 0 {
// Don't add a duplicated rule multiple times.
if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 {
duplicates = append(duplicates, cRules[i])
}
}
last = cRules[i]
}
return duplicates
}
func checkSDFile(filename string) ([]*targetgroup.Group, error) {
fd, err := os.Open(filename)
if err != nil {
return nil, err
}
defer fd.Close()
content, err := io.ReadAll(fd)
if err != nil {
return nil, err
}
var targetGroups []*targetgroup.Group
switch ext := filepath.Ext(filename); strings.ToLower(ext) {
case ".json":
if err := json.Unmarshal(content, &targetGroups); err != nil {
return nil, err
}
case ".yml", ".yaml":
if err := yaml.UnmarshalStrict(content, &targetGroups); err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("invalid file extension: %q", ext)
}
for i, tg := range targetGroups {
if tg == nil {
return nil, fmt.Errorf("nil target group item found (index %d)", i)
}
}
return targetGroups, nil
}
func ruleMetric(rule rulefmt.Rule) string {
if rule.Alert != "" {
return rule.Alert
}
return rule.Record
}
func checkFileExists(fn string) error {
// Nothing set, nothing to error on.
if fn == "" {
return nil
}
_, err := os.Stat(fn)
return err
}

112
promtoollib/config_test.go Normal file
View File

@ -0,0 +1,112 @@
// Copyright 2025 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package promtoollib
import (
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
func TestCheckScrapeConfigs(t *testing.T) {
for _, tc := range []struct {
name string
lookbackDelta model.Duration
expectError bool
}{
{
name: "scrape interval less than lookback delta",
lookbackDelta: model.Duration(11 * time.Minute),
expectError: false,
},
{
name: "scrape interval greater than lookback delta",
lookbackDelta: model.Duration(5 * time.Minute),
expectError: true,
},
{
name: "scrape interval same as lookback delta",
lookbackDelta: model.Duration(10 * time.Minute),
expectError: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
// Non-fatal linting.
code := CheckConfig(false, false, NewConfigLintConfig(LintOptionTooLongScrapeInterval, false, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Non-fatal linting should return success")
// Fatal linting.
code = CheckConfig(false, false, NewConfigLintConfig(LintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
if tc.expectError {
require.Equal(t, lintErrExitCode, code, "Fatal linting should return error")
} else {
require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems")
}
// Check syntax only, no linting.
code = CheckConfig(false, true, NewConfigLintConfig(LintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only")
// Lint option "none" should disable linting.
code = CheckConfig(false, false, NewConfigLintConfig(lintOptionNone+","+LintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`)
})
}
}
func TestCheckScrapeConfigsWithOutput(t *testing.T) {
for _, tc := range []struct {
name string
lookbackDelta model.Duration
expectError bool
}{
{
name: "scrape interval less than lookback delta",
lookbackDelta: model.Duration(11 * time.Minute),
expectError: false,
},
{
name: "scrape interval greater than lookback delta",
lookbackDelta: model.Duration(5 * time.Minute),
expectError: true,
},
{
name: "scrape interval same as lookback delta",
lookbackDelta: model.Duration(10 * time.Minute),
expectError: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
// Non-fatal linting.
code, output := CheckConfigWithOutput(false, false, NewConfigLintConfig(LintOptionTooLongScrapeInterval, false, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Non-fatal linting should return success")
require.NotEmpty(t, output, "Non-fatal linting should produce output")
// Fatal linting.
code, output = CheckConfigWithOutput(false, false, NewConfigLintConfig(LintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
if tc.expectError {
require.Equal(t, lintErrExitCode, code, "Fatal linting should return error")
} else {
require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems")
}
require.NotEmpty(t, output, "Non-fatal linting should produce output")
// Check syntax only, no linting.
code, output = CheckConfigWithOutput(false, true, NewConfigLintConfig(LintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only")
require.NotEmpty(t, output, "Non-fatal linting should produce output")
// Lint option "none" should disable linting.
code, output = CheckConfigWithOutput(false, false, NewConfigLintConfig(lintOptionNone+","+LintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`)
require.NotEmpty(t, output, "Non-fatal linting should produce output")
})
}
}

View File

@ -0,0 +1,7 @@
rule_files:
- at-modifier.yml
tests:
- input_series:
- series: "requests{}"
values: 1

7
promtoollib/testdata/at-modifier.yml vendored Normal file
View File

@ -0,0 +1,7 @@
# This is the rules file for at-modifier-test.yml.
groups:
- name: at-modifier
rules:
- record: x
expr: "requests @ 1000"

View File

@ -0,0 +1,4 @@
scrape_configs:
- job_name: test
authorization:
credentials_file: "/random/file/which/does/not/exist.yml"

View File

@ -0,0 +1,4 @@
scrape_configs:
- job_name: test
authorization:
credentials_file: "."

View File

@ -0,0 +1,4 @@
tests:
- input_series:
- series: 'up{job="prometheus", instance="localhost:9090"'
values: "0+0x1440"

12
promtoollib/testdata/bad-promql.yml vendored Normal file
View File

@ -0,0 +1,12 @@
tests:
- input_series:
- series: 'join_1{a="1",b="2"}'
values: 1
- series: 'join_2{a="1",b="3"}'
values: 2
- series: 'join_2{a="1",b="4"}'
values: 3
promql_expr_test:
# This PromQL generates an error.
- expr: "join_1 + on(a) join_2"

View File

@ -0,0 +1,14 @@
rule_files:
- bad-rules-error.yml
tests:
- input_series:
- series: 'join_1{a="1",b="2"}'
values: 1
- series: 'join_2{a="1",b="3"}'
values: 2
- series: 'join_2{a="1",b="4"}'
values: 3
# Just the existence of the data, that can't be joined, makes the recording
# rules error.

View File

@ -0,0 +1,7 @@
# This is the rules file for bad-rules-error-test.yml.
groups:
- name: bad-example
rules:
- record: joined
expr: join_1 + on(a) join_2

View File

@ -0,0 +1,6 @@
rule_files:
- bad-rules-syntax.yml
tests:
# Need a test to ensure the recording rules actually run.
- {}

View File

@ -0,0 +1,7 @@
# This is the rules file for bad-rules-syntax-test.yml.
groups:
- name: bad-syntax
rules:
- record: x
expr: 'test +'

View File

View File

@ -0,0 +1,2 @@
- targats:
- localhost:9100

View File

@ -0,0 +1,3 @@
rule_files:
- non-existent-file.yml
- /etc/non/existent/file.yml

View File

@ -0,0 +1,12 @@
scrape_configs:
- job_name: prometheus
file_sd_configs:
- files:
- nonexistent_file.yml
alerting:
alertmanagers:
- scheme: http
api_version: v2
file_sd_configs:
- files:
- nonexistent_file.yml

View File

@ -0,0 +1,5 @@
scrape_configs:
- job_name: "some job"
tls_config:
cert_file: nonexistent_cert_file.yml
key_file: nonexistent_key_file.yml

View File

@ -0,0 +1,15 @@
my_histogram_bucket{instance="localhost:8000",job="example2",le="+Inf"} 1.0267820369e+10 1700215884.373
my_histogram_bucket{instance="localhost:8000",job="example2",le="+Inf"} 1.026872507e+10 1700215889.373
my_histogram_bucket{instance="localhost:8000",job="example2",le="0.01"} 0 1700215884.373
my_histogram_bucket{instance="localhost:8000",job="example2",le="0.01"} 0 1700215889.373
my_histogram_count{instance="localhost:8000",job="example2"} 1.0267820369e+10 1700215884.373
my_histogram_count{instance="localhost:8000",job="example2"} 1.026872507e+10 1700215889.373
my_summary_count{instance="localhost:8000",job="example5"} 9.518161497e+09 1700211684.981
my_summary_count{instance="localhost:8000",job="example5"} 9.519048034e+09 1700211689.984
my_summary_sum{instance="localhost:8000",job="example5"} 5.2349889185e+10 1700211684.981
my_summary_sum{instance="localhost:8000",job="example5"} 5.2354761848e+10 1700211689.984
up{instance="localhost:8000",job="example2"} 1 1700226034.330
up{instance="localhost:8000",job="example2"} 1 1700226094.329
up{instance="localhost:8000",job="example3"} 1 1700210681.366
up{instance="localhost:8000",job="example3"} 1 1700210686.366
# EOF

View File

@ -0,0 +1,11 @@
my_counter{baz="abc",foo="bar"} 1 0.000
my_counter{baz="abc",foo="bar"} 2 60.000
my_counter{baz="abc",foo="bar"} 3 120.000
my_counter{baz="abc",foo="bar"} 4 180.000
my_counter{baz="abc",foo="bar"} 5 240.000
my_gauge{abc="baz",bar="foo"} 9 0.000
my_gauge{abc="baz",bar="foo"} 8 60.000
my_gauge{abc="baz",bar="foo"} 0 120.000
my_gauge{abc="baz",bar="foo"} 4 180.000
my_gauge{abc="baz",bar="foo"} 7 240.000
# EOF

15
promtoollib/testdata/dump-test-1.prom vendored Normal file
View File

@ -0,0 +1,15 @@
{__name__="heavy_metric", foo="bar"} 5 0
{__name__="heavy_metric", foo="bar"} 4 60000
{__name__="heavy_metric", foo="bar"} 3 120000
{__name__="heavy_metric", foo="bar"} 2 180000
{__name__="heavy_metric", foo="bar"} 1 240000
{__name__="heavy_metric", foo="foo"} 5 0
{__name__="heavy_metric", foo="foo"} 4 60000
{__name__="heavy_metric", foo="foo"} 3 120000
{__name__="heavy_metric", foo="foo"} 2 180000
{__name__="heavy_metric", foo="foo"} 1 240000
{__name__="metric", baz="abc", foo="bar"} 1 0
{__name__="metric", baz="abc", foo="bar"} 2 60000
{__name__="metric", baz="abc", foo="bar"} 3 120000
{__name__="metric", baz="abc", foo="bar"} 4 180000
{__name__="metric", baz="abc", foo="bar"} 5 240000

10
promtoollib/testdata/dump-test-2.prom vendored Normal file
View File

@ -0,0 +1,10 @@
{__name__="heavy_metric", foo="foo"} 5 0
{__name__="heavy_metric", foo="foo"} 4 60000
{__name__="heavy_metric", foo="foo"} 3 120000
{__name__="heavy_metric", foo="foo"} 2 180000
{__name__="heavy_metric", foo="foo"} 1 240000
{__name__="metric", baz="abc", foo="bar"} 1 0
{__name__="metric", baz="abc", foo="bar"} 2 60000
{__name__="metric", baz="abc", foo="bar"} 3 120000
{__name__="metric", baz="abc", foo="bar"} 4 180000
{__name__="metric", baz="abc", foo="bar"} 5 240000

2
promtoollib/testdata/dump-test-3.prom vendored Normal file
View File

@ -0,0 +1,2 @@
{__name__="metric", baz="abc", foo="bar"} 2 60000
{__name__="metric", baz="abc", foo="bar"} 3 120000

38
promtoollib/testdata/failing.yml vendored Normal file
View File

@ -0,0 +1,38 @@
rule_files:
- rules.yml
tests:
# Simple failing test, depends on no rules.
- interval: 1m
name: "Failing test"
input_series:
- series: test
values: '0'
promql_expr_test:
- expr: test
eval_time: 0m
exp_samples:
- value: 1
labels: test
alert_rule_test:
- eval_time: 0m
alertname: Test
exp_alerts:
- exp_labels: {}
# Alerts firing, but no alert expected by the test.
- interval: 1m
name: Failing alert test
input_series:
- series: 'up{job="test"}'
values: 0x10
alert_rule_test:
- eval_time: 5m
alertname: InstanceDown
exp_alerts: []
- eval_time: 5m
alertname: AlwaysFiring
exp_alerts: []

6
promtoollib/testdata/features.yml vendored Normal file
View File

@ -0,0 +1,6 @@
groups:
- name: features
rules:
- record: x
# We don't expect anything from this, just want to check the function parses.
expr: sort_by_label(up, "instance")

View File

@ -0,0 +1,8 @@
[
{
"labels": {
"job": "node"
},
"targets": ["localhost:9100"]
}
]

View File

@ -0,0 +1,4 @@
- labels:
job: node
- targets:
- localhost:9100

4
promtoollib/testdata/good-sd-file.yml vendored Normal file
View File

@ -0,0 +1,4 @@
- labels:
job: node
- targets:
- localhost:9100

34
promtoollib/testdata/long-period.yml vendored Normal file
View File

@ -0,0 +1,34 @@
# Evaluate once every 100d to avoid this taking too long.
evaluation_interval: 100d
rule_files:
- rules.yml
tests:
- interval: 100d
input_series:
- series: test
# Max time in time.Duration is 106751d from 1970 (2^63/10^9), i.e. 2262.
# We use the nearest 100 days to that to ensure the unit tests can fully
# cover the expected range.
values: '0+1x1067'
promql_expr_test:
- expr: timestamp(test)
eval_time: 0m
exp_samples:
- value: 0
- expr: test
eval_time: 100d # one evaluation_interval.
exp_samples:
- labels: test
value: 1
- expr: timestamp(test)
eval_time: 106700d
exp_samples:
- value: 9218880000 # 106700d -> seconds.
- expr: fixed_data
eval_time: 106700d
exp_samples:
- labels: fixed_data
value: 1

35
promtoollib/testdata/metrics-test.prom vendored Normal file
View File

@ -0,0 +1,35 @@
# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 2.391e-05
go_gc_duration_seconds{quantile="0.25"} 9.4402e-05
go_gc_duration_seconds{quantile="0.5"} 0.000118953
go_gc_duration_seconds{quantile="0.75"} 0.000145884
go_gc_duration_seconds{quantile="1"} 0.005201208
go_gc_duration_seconds_sum 0.036134048
go_gc_duration_seconds_count 232
# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction
# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 662
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 1460
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 2266
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 3958
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4861
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 5721
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 10493
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 12464
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 13254
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 13699
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 13806
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 13852
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 13867
prometheus_tsdb_compaction_chunk_size_bytes_sum 3.886707e+06
prometheus_tsdb_compaction_chunk_size_bytes_count 13867
# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name.
# TYPE net_conntrack_dialer_conn_attempted_total counter
net_conntrack_dialer_conn_attempted_total{dialer_name="blackbox"} 5210
net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0
net_conntrack_dialer_conn_attempted_total{dialer_name="node"} 21
net_conntrack_dialer_conn_attempted_total{dialer_name="prometheus"} 21
# HELP go_info Information about the Go environment.
# TYPE go_info gauge
go_info{version="go1.17"} 1

View File

@ -0,0 +1,7 @@
rule_files:
- negative-offset.yml
tests:
- input_series:
- series: "requests{}"
values: 1

View File

@ -0,0 +1,7 @@
# This is the rules file for negative-offset-test.yml.
groups:
- name: negative-offset
rules:
- record: x
expr: "requests offset -5m"

View File

@ -0,0 +1,15 @@
tests:
- input_series:
- series: test
values: 0 1
promql_expr_test:
- expr: test
eval_time: 59s
exp_samples:
- value: 0
labels: test
- expr: test
eval_time: 1m
exp_samples:
- value: 1
labels: test

View File

@ -0,0 +1 @@
not-prometheus:

View File

View File

@ -0,0 +1,3 @@
scrape_configs:
- job_name: too_long_scrape_interval_test
scrape_interval: 10m

View File

@ -0,0 +1,2 @@
rule_files:
- prometheus-rules.lint.yml

View File

@ -0,0 +1,17 @@
groups:
- name: example
rules:
- alert: HighRequestLatency
expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5
for: 10m
labels:
severity: page
annotations:
summary: High request latency
- alert: HighRequestLatency
expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5
for: 10m
labels:
severity: page
annotations:
summary: High request latency

28
promtoollib/testdata/rules-bad.yml vendored Normal file
View File

@ -0,0 +1,28 @@
# This is the rules file.
groups:
- name: alerts
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $label.foo }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- name: rules
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
# A recording rule that doesn't depend on input series.
- record: fixed_data
expr: 1
# Subquery with default resolution test.
- record: suquery_interval_test
expr: count_over_time(up[5m:])

28
promtoollib/testdata/rules.yml vendored Normal file
View File

@ -0,0 +1,28 @@
# This is the rules file.
groups:
- name: alerts
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- name: rules
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
# A recording rule that doesn't depend on input series.
- record: fixed_data
expr: 1
# Subquery with default resolution test.
- record: suquery_interval_test
expr: count_over_time(up[5m:])

View File

@ -0,0 +1,24 @@
# This is a rules file with duplicate expressions
groups:
- name: base
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
# A recording rule that doesn't depend on input series.
- record: fixed_data
expr: 1
# Subquery with default resolution test.
- record: suquery_interval_test
expr: count_over_time(up[5m:])
# Duplicating
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
- name: duplicate
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))

View File

@ -0,0 +1,33 @@
# This is the rules file. It has an extra "ownership"
# field in the second group. promtool should ignore this field
# and not return an error with --ignore-unknown-fields.
groups:
- name: alerts
namespace: "foobar"
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- name: rules
ownership:
service: "test"
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
# A recording rule that doesn't depend on input series.
- record: fixed_data
expr: 1
# Subquery with default resolution test.
- record: suquery_interval_test
expr: count_over_time(up[5m:])

40011
promtoollib/testdata/rules_large.yml vendored Normal file

File diff suppressed because it is too large Load Diff

30
promtoollib/testdata/rules_run.yml vendored Normal file
View File

@ -0,0 +1,30 @@
rule_files:
- rules.yml
evaluation_interval: 1m
# Minimal test cases to check focus on a rule group.
tests:
- name: correct test
input_series:
- series: test
values: 1
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test
- name: wrong test
input_series:
- series: test
values: 0
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test

View File

@ -0,0 +1,21 @@
# Minimal test case to see that --ignore-unknown-fields
# is working as expected. It should not return an error
# when any extra fields are present in the rules file.
rule_files:
- rules_extrafields.yml
evaluation_interval: 1m
tests:
- name: extra ownership field test
input_series:
- series: test
values: 1
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test

View File

@ -0,0 +1,43 @@
# Minimal test case to see that fuzzy compare is working as expected.
# It should allow slight floating point differences through. Larger
# floating point differences should still fail.
evaluation_interval: 1m
fuzzy_compare: true
tests:
- name: correct fuzzy match
input_series:
- series: test_low
values: 2.9999999999999996
- series: test_high
values: 3.0000000000000004
promql_expr_test:
- expr: test_low
eval_time: 0
exp_samples:
- labels: test_low
value: 3
- expr: test_high
eval_time: 0
exp_samples:
- labels: test_high
value: 3
- name: wrong fuzzy match
input_series:
- series: test_low
values: 2.9999999999999987
- series: test_high
values: 3.0000000000000013
promql_expr_test:
- expr: test_low
eval_time: 0
exp_samples:
- labels: test_low
value: 3
- expr: test_high
eval_time: 0
exp_samples:
- labels: test_high
value: 3

View File

@ -0,0 +1,24 @@
# Minimal test case to see that fuzzy compare can be turned off,
# and slight floating point differences fail matching.
evaluation_interval: 1m
fuzzy_compare: false
tests:
- name: correct fuzzy match
input_series:
- series: test_low
values: 2.9999999999999996
- series: test_high
values: 3.0000000000000004
promql_expr_test:
- expr: test_low
eval_time: 0
exp_samples:
- labels: test_low
value: 3
- expr: test_high
eval_time: 0
exp_samples:
- labels: test_high
value: 3

203
promtoollib/testdata/unittest.yml vendored Normal file
View File

@ -0,0 +1,203 @@
rule_files:
- rules.yml
evaluation_interval: 1m
tests:
# Basic tests for promql_expr_test, not dependent on rules.
- interval: 1m
input_series:
- series: test_full
values: "0 0"
- series: test_repeat
values: "1x2"
- series: test_increase
values: "1+1x2"
- series: test_histogram
values: "{{schema:1 sum:-0.3 count:32.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5}}"
- series: test_histogram_repeat
values: "{{sum:3 count:2 buckets:[2]}}x2"
- series: test_histogram_increase
values: "{{sum:3 count:2 buckets:[2]}}+{{sum:1.3 count:1 buckets:[1]}}x2"
- series: test_stale
values: "0 stale"
- series: test_missing
values: "0 _ _ _ _ _ _ 0"
promql_expr_test:
# Ensure the sample is evaluated at the time we expect it to be.
- expr: timestamp(test_full)
eval_time: 0m
exp_samples:
- value: 0
- expr: timestamp(test_full)
eval_time: 1m
exp_samples:
- value: 60
- expr: timestamp(test_full)
eval_time: 2m
exp_samples:
- value: 60
# Repeat & increase
- expr: test_repeat
eval_time: 2m
exp_samples:
- value: 1
labels: "test_repeat"
- expr: test_increase
eval_time: 2m
exp_samples:
- value: 3
labels: "test_increase"
# Histograms
- expr: test_histogram
eval_time: 1m
exp_samples:
- labels: "test_histogram"
histogram: "{{schema:1 sum:-0.3 count:32.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5}}"
- expr: test_histogram_repeat
eval_time: 2m
exp_samples:
- labels: "test_histogram_repeat"
histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}"
- expr: test_histogram_increase
eval_time: 2m
exp_samples:
- labels: "test_histogram_increase"
histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}"
# Ensure a value is stale as soon as it is marked as such.
- expr: test_stale
eval_time: 59s
exp_samples:
- value: 0
labels: "test_stale"
- expr: test_stale
eval_time: 1m
exp_samples: []
# Ensure lookback delta is respected, when a value is missing.
- expr: timestamp(test_missing)
eval_time: 4m59s
exp_samples:
- value: 0
- expr: timestamp(test_missing)
eval_time: 5m
exp_samples: []
# Minimal test case to check edge case of a single sample.
- input_series:
- series: test
values: 1
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test
# Test recording rules run even if input_series isn't provided.
- promql_expr_test:
- expr: count_over_time(fixed_data[1h])
eval_time: 1h
exp_samples:
- value: 60
- expr: timestamp(fixed_data)
eval_time: 1h
exp_samples:
- value: 3600
# Tests for alerting rules.
- interval: 1m
input_series:
- series: 'up{job="prometheus", instance="localhost:9090"}'
values: "0+0x1440"
promql_expr_test:
- expr: count(ALERTS) by (alertname, alertstate)
eval_time: 4m
exp_samples:
- labels: '{alertname="AlwaysFiring",alertstate="firing"}'
value: 1
- labels: '{alertname="InstanceDown",alertstate="pending"}'
value: 1
alert_rule_test:
- eval_time: 1d
alertname: AlwaysFiring
exp_alerts:
- {}
- eval_time: 1d
alertname: InstanceDown
exp_alerts:
- exp_labels:
severity: page
instance: localhost:9090
job: prometheus
exp_annotations:
summary: "Instance localhost:9090 down"
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
- eval_time: 0
alertname: AlwaysFiring
exp_alerts:
- {}
- eval_time: 0
alertname: InstanceDown
exp_alerts: []
# Tests for interval vs evaluation_interval.
- interval: 1s
input_series:
- series: 'test{job="test", instance="x:0"}'
# 2 minutes + 1 second of input data, recording rules should only run
# once a minute.
values: "0+1x120"
promql_expr_test:
- expr: job:test:count_over_time1m
eval_time: 0m
exp_samples:
- value: 1
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 10s
exp_samples:
- value: 0
labels: '{job="test"}'
- expr: job:test:count_over_time1m
eval_time: 1m
exp_samples:
- value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 1m10s
exp_samples:
- value: 60
labels: '{job="test"}'
- expr: job:test:count_over_time1m
eval_time: 2m
exp_samples:
- value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 2m59s999ms
exp_samples:
- value: 120
labels: '{job="test"}'

View File

@ -0,0 +1,8 @@
alerting:
alertmanagers:
- relabel_configs:
- source_labels: [__address__]
target_label: __param_target
static_configs:
- targets:
- http://bad

View File

@ -0,0 +1,10 @@
alerting:
alertmanagers:
- relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: good
static_configs:
- targets:
- http://bad

View File

@ -0,0 +1,8 @@
scrape_configs:
- job_name: prometheus
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
static_configs:
- targets:
- http://bad

View File

@ -0,0 +1,10 @@
scrape_configs:
- job_name: prometheus
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: good
static_configs:
- targets:
- http://good