mirror of
https://github.com/prometheus/prometheus.git
synced 2025-11-29 06:31:01 +01:00
tsdb: Deprecate retention flags; add tsdb.retention runtime configuration (#17026)
* Move storage from CL to config file Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * Fix .md Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * run make cli-documentation Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * fix Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * run make cli-documentation Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * nit_fixed Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * fix Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * add test and update configuration.md Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> * fix lint Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com> --------- Signed-off-by: pipiland2612 <nguyen.t.dang.minh@gmail.com>
This commit is contained in:
parent
6bb367970e
commit
ad4b59c504
@ -447,10 +447,10 @@ func main() {
|
||||
"Size at which to split the tsdb WAL segment files. Example: 100MB").
|
||||
Hidden().PlaceHolder("<bytes>").BytesVar(&cfg.tsdb.WALSegmentSize)
|
||||
|
||||
serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. If neither this flag nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms.").
|
||||
serverOnlyFlag(a, "storage.tsdb.retention.time", "[DEPRECATED] How long to retain samples in storage. If neither this flag nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms. This flag has been deprecated, use the storage.tsdb.retention.time field in the config file instead.").
|
||||
SetValue(&cfg.tsdb.RetentionDuration)
|
||||
|
||||
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B.").
|
||||
serverOnlyFlag(a, "storage.tsdb.retention.size", "[DEPRECATED] Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B. This flag has been deprecated, use the storage.tsdb.retention.size field in the config file instead.").
|
||||
BytesVar(&cfg.tsdb.MaxBytes)
|
||||
|
||||
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
|
||||
@ -671,6 +671,14 @@ func main() {
|
||||
}
|
||||
if cfgFile.StorageConfig.TSDBConfig != nil {
|
||||
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention != nil {
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 {
|
||||
cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
|
||||
}
|
||||
if cfgFile.StorageConfig.TSDBConfig.Retention.Size > 0 {
|
||||
cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set Go runtime parameters before we get too far into initialization.
|
||||
|
||||
@ -1051,6 +1051,15 @@ type StorageConfig struct {
|
||||
ExemplarsConfig *ExemplarsConfig `yaml:"exemplars,omitempty"`
|
||||
}
|
||||
|
||||
// TSDBRetentionConfig holds the configuration retention of data in storage storage.
|
||||
type TSDBRetentionConfig struct {
|
||||
// How long to retain samples in storage.
|
||||
Time model.Duration `yaml:"time,omitempty"`
|
||||
|
||||
// Maximum number of bytes that can be stored for blocks.
|
||||
Size units.Base2Bytes `yaml:"size,omitempty"`
|
||||
}
|
||||
|
||||
// TSDBConfig configures runtime reloadable configuration options.
|
||||
type TSDBConfig struct {
|
||||
// OutOfOrderTimeWindow sets how long back in time an out-of-order sample can be inserted
|
||||
@ -1063,6 +1072,8 @@ type TSDBConfig struct {
|
||||
// During unmarshall, this is converted into milliseconds and stored in OutOfOrderTimeWindow.
|
||||
// This should not be used directly and must be converted into OutOfOrderTimeWindow.
|
||||
OutOfOrderTimeWindowFlag model.Duration `yaml:"out_of_order_time_window,omitempty"`
|
||||
|
||||
Retention *TSDBRetentionConfig `yaml:"retention,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
|
||||
@ -1706,6 +1706,10 @@ var expectedConf = &Config{
|
||||
TSDBConfig: &TSDBConfig{
|
||||
OutOfOrderTimeWindow: 30 * time.Minute.Milliseconds(),
|
||||
OutOfOrderTimeWindowFlag: model.Duration(30 * time.Minute),
|
||||
Retention: &TSDBRetentionConfig{
|
||||
Time: model.Duration(24 * time.Hour),
|
||||
Size: 1 * units.GiB,
|
||||
},
|
||||
},
|
||||
},
|
||||
TracingConfig: TracingConfig{
|
||||
|
||||
3
config/testdata/conf.good.yml
vendored
3
config/testdata/conf.good.yml
vendored
@ -453,6 +453,9 @@ alerting:
|
||||
storage:
|
||||
tsdb:
|
||||
out_of_order_time_window: 30m
|
||||
retention:
|
||||
time: 1d
|
||||
size: 1GB
|
||||
|
||||
tracing:
|
||||
endpoint: "localhost:4317"
|
||||
|
||||
@ -34,8 +34,8 @@ The Prometheus monitoring server
|
||||
| <code class="text-nowrap">--web.page-title</code> | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` |
|
||||
| <code class="text-nowrap">--web.cors.origin</code> | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1\|domain2)\.com' | `.*` |
|
||||
| <code class="text-nowrap">--storage.tsdb.path</code> | Base path for metrics storage. Use with server mode only. | `data/` |
|
||||
| <code class="text-nowrap">--storage.tsdb.retention.time</code> | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
|
||||
| <code class="text-nowrap">--storage.tsdb.retention.size</code> | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | |
|
||||
| <code class="text-nowrap">--storage.tsdb.retention.time</code> | [DEPRECATED] How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. This flag has been deprecated, use the storage.tsdb.retention.time field in the config file instead. Use with server mode only. | |
|
||||
| <code class="text-nowrap">--storage.tsdb.retention.size</code> | [DEPRECATED] Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. This flag has been deprecated, use the storage.tsdb.retention.size field in the config file instead. Use with server mode only. | |
|
||||
| <code class="text-nowrap">--storage.tsdb.no-lockfile</code> | Do not create lockfile in data directory. Use with server mode only. | `false` |
|
||||
| <code class="text-nowrap">--storage.tsdb.head-chunks-write-queue-size</code> | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` |
|
||||
| <code class="text-nowrap">--storage.agent.path</code> | Base path for metrics storage. Use with agent mode only. | `data-agent/` |
|
||||
|
||||
@ -3229,6 +3229,26 @@ with this feature.
|
||||
# the agent's WAL to accept out-of-order samples that fall within the specified time window relative
|
||||
# to the timestamp of the last appended sample for the same series.
|
||||
[ out_of_order_time_window: <duration> | default = 0s ]
|
||||
|
||||
|
||||
# Configures data retention settings for TSDB.
|
||||
#
|
||||
# Note: When retention is changed at runtime, the retention
|
||||
# settings are updated immediately, but block deletion based on the new retention policy
|
||||
# occurs during the next block reload cycle. This happens automatically within 1 minute
|
||||
# or when a compaction completes, whichever comes first.
|
||||
[ retention: <retention> ] :
|
||||
# How long to retain samples in storage. If neither this option nor the size option
|
||||
# is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms.
|
||||
# This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.time.
|
||||
[ time: <duration> | default = 15d ]
|
||||
|
||||
# Maximum number of bytes that can be stored for blocks. A unit is required,
|
||||
# supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B.
|
||||
# If set to 0 or not set, size-based retention is disabled.
|
||||
# This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.size.
|
||||
[ size: <size> | default = 0 ]
|
||||
|
||||
```
|
||||
|
||||
### `<exemplars>`
|
||||
|
||||
42
tsdb/db.go
42
tsdb/db.go
@ -264,6 +264,10 @@ type DB struct {
|
||||
autoCompactMtx sync.Mutex
|
||||
autoCompact bool
|
||||
|
||||
// retentionMtx protects access to retention configuration values that can
|
||||
// be updated at runtime through config file changes.
|
||||
retentionMtx sync.RWMutex
|
||||
|
||||
// Cancel a running compaction when a shutdown is initiated.
|
||||
compactCancel context.CancelFunc
|
||||
|
||||
@ -1153,6 +1157,20 @@ func (db *DB) ApplyConfig(conf *config.Config) error {
|
||||
oooTimeWindow := int64(0)
|
||||
if conf.StorageConfig.TSDBConfig != nil {
|
||||
oooTimeWindow = conf.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
|
||||
|
||||
// Update retention configuration if provided.
|
||||
if conf.StorageConfig.TSDBConfig.Retention != nil {
|
||||
db.retentionMtx.Lock()
|
||||
if conf.StorageConfig.TSDBConfig.Retention.Time > 0 {
|
||||
db.opts.RetentionDuration = int64(conf.StorageConfig.TSDBConfig.Retention.Time)
|
||||
db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
|
||||
}
|
||||
if conf.StorageConfig.TSDBConfig.Retention.Size > 0 {
|
||||
db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
|
||||
db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
|
||||
}
|
||||
db.retentionMtx.Unlock()
|
||||
}
|
||||
}
|
||||
if oooTimeWindow < 0 {
|
||||
oooTimeWindow = 0
|
||||
@ -1187,6 +1205,20 @@ func (db *DB) ApplyConfig(conf *config.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// getRetentionDuration returns the current retention duration in a thread-safe manner.
|
||||
func (db *DB) getRetentionDuration() int64 {
|
||||
db.retentionMtx.RLock()
|
||||
defer db.retentionMtx.RUnlock()
|
||||
return db.opts.RetentionDuration
|
||||
}
|
||||
|
||||
// getMaxBytes returns the current max bytes setting in a thread-safe manner.
|
||||
func (db *DB) getMaxBytes() int64 {
|
||||
db.retentionMtx.RLock()
|
||||
defer db.retentionMtx.RUnlock()
|
||||
return db.opts.MaxBytes
|
||||
}
|
||||
|
||||
// dbAppender wraps the DB's head appender and triggers compactions on commit
|
||||
// if necessary.
|
||||
type dbAppender struct {
|
||||
@ -1734,7 +1766,8 @@ func deletableBlocks(db *DB, blocks []*Block) map[ulid.ULID]struct{} {
|
||||
// set in the db options.
|
||||
func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struct{}) {
|
||||
// Time retention is disabled or no blocks to work with.
|
||||
if len(blocks) == 0 || db.opts.RetentionDuration == 0 {
|
||||
retentionDuration := db.getRetentionDuration()
|
||||
if len(blocks) == 0 || retentionDuration == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
@ -1742,7 +1775,7 @@ func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
|
||||
for i, block := range blocks {
|
||||
// The difference between the first block and this block is greater than or equal to
|
||||
// the retention period so any blocks after that are added as deletable.
|
||||
if i > 0 && blocks[0].Meta().MaxTime-block.Meta().MaxTime >= db.opts.RetentionDuration {
|
||||
if i > 0 && blocks[0].Meta().MaxTime-block.Meta().MaxTime >= retentionDuration {
|
||||
for _, b := range blocks[i:] {
|
||||
deletable[b.meta.ULID] = struct{}{}
|
||||
}
|
||||
@ -1757,7 +1790,8 @@ func BeyondTimeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
|
||||
// set in the db options.
|
||||
func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struct{}) {
|
||||
// Size retention is disabled or no blocks to work with.
|
||||
if len(blocks) == 0 || db.opts.MaxBytes <= 0 {
|
||||
maxBytes := db.getMaxBytes()
|
||||
if len(blocks) == 0 || maxBytes <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
@ -1768,7 +1802,7 @@ func BeyondSizeRetention(db *DB, blocks []*Block) (deletable map[ulid.ULID]struc
|
||||
blocksSize := db.Head().Size()
|
||||
for i, block := range blocks {
|
||||
blocksSize += block.Size()
|
||||
if blocksSize > db.opts.MaxBytes {
|
||||
if blocksSize > maxBytes {
|
||||
// Add this and all following blocks for deletion.
|
||||
for _, b := range blocks[i:] {
|
||||
deletable[b.meta.ULID] = struct{}{}
|
||||
|
||||
@ -42,6 +42,7 @@ import (
|
||||
"github.com/oklog/ulid/v2"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/promslog"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/atomic"
|
||||
@ -1719,6 +1720,73 @@ func TestSizeRetentionMetric(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRuntimeRetentionConfigChange tests that retention configuration can be
|
||||
// changed at runtime via ApplyConfig and that the retention logic properly
|
||||
// deletes blocks when retention is shortened. This test also ensures race-free
|
||||
// concurrent access to retention settings.
|
||||
func TestRuntimeRetentionConfigChange(t *testing.T) {
|
||||
const (
|
||||
initialRetentionDuration = int64(10 * time.Hour / time.Millisecond) // 10 hours
|
||||
shorterRetentionDuration = int64(1 * time.Hour / time.Millisecond) // 1 hour
|
||||
)
|
||||
|
||||
db := openTestDB(t, &Options{
|
||||
RetentionDuration: initialRetentionDuration,
|
||||
}, []int64{100})
|
||||
defer func() {
|
||||
require.NoError(t, db.Close())
|
||||
}()
|
||||
|
||||
nineHoursMs := int64(9 * time.Hour / time.Millisecond)
|
||||
nineAndHalfHoursMs := int64((9*time.Hour + 30*time.Minute) / time.Millisecond)
|
||||
blocks := []*BlockMeta{
|
||||
{MinTime: 0, MaxTime: 100}, // 10 hours old (beyond new retention)
|
||||
{MinTime: 100, MaxTime: 200}, // 9.9 hours old (beyond new retention)
|
||||
{MinTime: nineHoursMs, MaxTime: nineAndHalfHoursMs}, // 1 hour old (within new retention)
|
||||
{MinTime: nineAndHalfHoursMs, MaxTime: initialRetentionDuration}, // 0.5 hours old (within new retention)
|
||||
}
|
||||
|
||||
for _, m := range blocks {
|
||||
createBlock(t, db.Dir(), genSeries(10, 10, m.MinTime, m.MaxTime))
|
||||
}
|
||||
|
||||
// Reload blocks and verify all are loaded.
|
||||
require.NoError(t, db.reloadBlocks())
|
||||
require.Len(t, db.Blocks(), len(blocks), "expected all blocks to be loaded initially")
|
||||
|
||||
cfg := &config.Config{
|
||||
StorageConfig: config.StorageConfig{
|
||||
TSDBConfig: &config.TSDBConfig{
|
||||
Retention: &config.TSDBRetentionConfig{
|
||||
Time: model.Duration(shorterRetentionDuration),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
require.NoError(t, db.ApplyConfig(cfg), "ApplyConfig should succeed")
|
||||
|
||||
actualRetention := db.getRetentionDuration()
|
||||
require.Equal(t, shorterRetentionDuration, actualRetention, "retention duration should be updated")
|
||||
|
||||
expectedRetentionSeconds := (time.Duration(shorterRetentionDuration) * time.Millisecond).Seconds()
|
||||
actualRetentionSeconds := prom_testutil.ToFloat64(db.metrics.retentionDuration)
|
||||
require.Equal(t, expectedRetentionSeconds, actualRetentionSeconds, "retention duration metric should be updated")
|
||||
|
||||
require.NoError(t, db.reloadBlocks())
|
||||
|
||||
// Verify that blocks beyond the new retention were deleted.
|
||||
// We expect only the last 2 blocks to remain (those within 1 hour).
|
||||
actBlocks := db.Blocks()
|
||||
require.Len(t, actBlocks, 2, "expected old blocks to be deleted after retention change")
|
||||
|
||||
// Verify the remaining blocks are the newest ones.
|
||||
require.Equal(t, nineHoursMs, actBlocks[0].meta.MinTime, "first remaining block should be within retention")
|
||||
require.Equal(t, initialRetentionDuration, actBlocks[1].meta.MaxTime, "last remaining block should be the newest")
|
||||
|
||||
require.Positive(t, int(prom_testutil.ToFloat64(db.metrics.timeRetentionCount)), "time retention count should be incremented")
|
||||
}
|
||||
|
||||
func TestNotMatcherSelectsLabelsUnsetSeries(t *testing.T) {
|
||||
db := openTestDB(t, nil, nil)
|
||||
defer func() {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user