Merge branch 'refs/heads/main' into skip-wait-for-discovery

This commit is contained in:
avilevy 2026-03-20 23:21:59 +00:00
commit 3018f35527
No known key found for this signature in database
80 changed files with 7983 additions and 3349 deletions

View File

@ -142,9 +142,7 @@ jobs:
enable_npm: false
# NOTE: Those tests are based on https://github.com/prometheus/compliance and
# are executed against the ./cmd/prometheus main package.
- run: go test -skip ${SKIP_TESTS} -v --tags=compliance ./compliance/...
env:
SKIP_TESTS: "TestRemoteWriteSender/prometheus/samples/rw2/start_timestamp*" # TODO(bwplotka): PROM-60
- run: go test -v --tags=compliance ./compliance/...
build:
name: Build Prometheus for common architectures

View File

@ -20,7 +20,8 @@ Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/
| v3.8 | 2025-11-06 | Jan Fajerski (GitHub: @jan--f) |
| v3.9 | 2025-12-18 | Bryan Boreham (GitHub: @bboreham) |
| v3.10 | 2026-02-05 | Ganesh Vernekar (Github: @codesome) |
| v3.11 | 2026-03-19 | **volunteer welcome** |
| v3.11 | 2026-03-25 | Julien Pivotto (GitHub: @roidelapluie) |
| v3.12 | 2026-05-06 | **volunteer welcome** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.

View File

@ -271,6 +271,7 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
case "created-timestamp-zero-ingestion":
// NOTE(bwplotka): Once AppendableV1 is removed, there will be only the TSDB and agent flags.
c.scrape.EnableStartTimestampZeroIngestion = true
c.scrape.ParseST = true
c.web.STZeroIngestionEnabled = true
c.tsdb.EnableSTAsZeroSample = true
c.agent.EnableSTAsZeroSample = true
@ -279,16 +280,19 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
// This is to widen the ST support surface.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
logger.Info("Experimental start timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
logger.Info("Experimental start timestamp zero ingestion enabled. OpenMetrics 1.0 parsing will parse <metric>_created metrics as ST instead of normal sample. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "xor2-encoding":
c.tsdb.EnableXOR2Encoding = true
logger.Info("Experimental XOR2 chunk encoding enabled.")
case "st-storage":
// TODO(bwplotka): Implement ST Storage as per PROM-60 and document this hidden feature flag.
c.scrape.ParseST = true
c.tsdb.EnableSTStorage = true
c.agent.EnableSTStorage = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. This is to widen the ST support surface.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
logger.Info("Experimental start timestamp storage enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
logger.Info("Experimental start timestamp storage enabled. OpenMetrics 1.0 parsing will parse <metric>_created metrics as ST instead of normal sample. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "delayed-compaction":
c.tsdb.EnableDelayedCompaction = true
logger.Info("Experimental delayed compaction is enabled.")
@ -601,7 +605,7 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, st-storage, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers, xor2-encoding. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode)
@ -671,6 +675,18 @@ func main() {
os.Exit(2)
}
// Set TSDB retention defaults from CLI flags before any config file is loaded.
// This makes CLI flags act as the default when no retention section is present.
cliRetentionDuration := cfg.tsdb.RetentionDuration
cliMaxBytes := cfg.tsdb.MaxBytes
if cliRetentionDuration == 0 && cliMaxBytes == 0 {
cliRetentionDuration = defaultRetentionDuration
}
config.DefaultTSDBRetentionConfig = config.TSDBRetentionConfig{
Time: cliRetentionDuration,
Size: cliMaxBytes,
}
// Throw error for invalid config before starting other components.
var cfgFile *config.Config
if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil {
@ -712,21 +728,11 @@ func main() {
logger.Warn("The option --storage.tsdb.block-reload-interval is set to a value less than 1s. Setting it to 1s to avoid overload.")
cfg.tsdb.BlockReloadInterval = model.Duration(1 * time.Second)
}
if cfgFile.StorageConfig.TSDBConfig != nil {
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
if cfgFile.StorageConfig.TSDBConfig.Retention != nil {
if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 {
cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
}
if cfgFile.StorageConfig.TSDBConfig.Retention.Size > 0 {
cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
}
if cfgFile.StorageConfig.TSDBConfig.Retention.Percentage > 0 {
cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage
}
}
}
cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage
// Set Go runtime parameters before we get too far into initialization.
updateGoGC(cfgFile, logger)
@ -778,11 +784,6 @@ func main() {
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
if !agentMode {
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration)
}
// Check for overflows. This limits our max retention to 100y.
if cfg.tsdb.RetentionDuration < 0 {
y, err := model.ParseDuration("100y")
@ -1031,8 +1032,29 @@ func main() {
reloaders := []reloader{
{
name: "db_storage",
reloader: localStorage.ApplyConfig,
name: "db_storage",
reloader: func() func(*config.Config) error {
lastTSDBRetention := config.TSDBRetentionConfig{}
return func(cfg *config.Config) error {
err := localStorage.ApplyConfig(cfg)
if err != nil || agentMode || cfg.StorageConfig.TSDBConfig == nil || cfg.StorageConfig.TSDBConfig.Retention == nil {
return err
}
curr := cfg.StorageConfig.TSDBConfig.Retention
if *curr == lastTSDBRetention {
return nil
}
logger.Info("TSDB retention updated",
"duration", curr.Time,
"size", curr.Size,
"percentage", curr.Percentage,
)
lastTSDBRetention = *curr
return nil
}
}(),
}, {
name: "remote_storage",
reloader: remoteStorage.ApplyConfig,
@ -2009,6 +2031,7 @@ type tsdbOptions struct {
BlockReloadInterval model.Duration
EnableSTAsZeroSample bool
EnableSTStorage bool
EnableXOR2Encoding bool
StaleSeriesCompactionThreshold float64
}
@ -2039,6 +2062,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
FeatureRegistry: features.DefaultRegistry,
EnableSTAsZeroSample: opts.EnableSTAsZeroSample,
EnableSTStorage: opts.EnableSTStorage,
EnableXOR2Encoding: opts.EnableXOR2Encoding,
StaleSeriesCompactionThreshold: opts.StaleSeriesCompactionThreshold,
}
}

View File

@ -251,6 +251,8 @@
"exemplar_storage": false,
"isolation": true,
"native_histograms": true,
"st_storage": false,
"xor2_encoding": false,
"use_uncached_io": false
},
"ui": {

View File

@ -2,7 +2,7 @@ module compliance
go 1.25.0
require github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275
require github.com/prometheus/compliance/remotewrite v0.0.0-20260223092825-818283e1171e
require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect

View File

@ -30,8 +30,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275 h1:NLTtFqM00EuqtisYX9P+hQkjoxNxsR2oUQWDluyD2Xw=
github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275/go.mod h1:VEPZGvpSBbzTKc5acnBj9ng4gfo1DZ4qBsCQnoNFiSc=
github.com/prometheus/compliance/remotewrite v0.0.0-20260223092825-818283e1171e h1:tT/KBv0aSFq4AElo/bSVvUd+yNKj72hkRsyiKU45nIQ=
github.com/prometheus/compliance/remotewrite v0.0.0-20260223092825-818283e1171e/go.mod h1:VEPZGvpSBbzTKc5acnBj9ng4gfo1DZ4qBsCQnoNFiSc=
github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f h1:ERPCnBglv9Z4IjkEBTNbcHmZPlryMldXVWLkk7TeBIY=
github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f/go.mod h1:7hcXiGf9AXIKW2ehWWzxkvRYJTGmc2StUIJ8mprfxjg=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=

View File

@ -53,7 +53,9 @@ scrape_configs:
var scrapeConfigTmpl = template.Must(template.New("config").Parse(scrapeConfigTemplate))
type internalPrometheus struct{}
type internalPrometheus struct {
agentMode bool
}
func (p internalPrometheus) Name() string { return "internal-prometheus" }
@ -74,20 +76,33 @@ func (p internalPrometheus) Run(ctx context.Context, opts sender.Options) error
}
defer os.RemoveAll(dir)
return sender.RunCommand(ctx, "../cmd/prometheus", nil,
"go", "run", ".",
args := []string{
"run", ".",
"--web.listen-address=0.0.0.0:0",
fmt.Sprintf("--storage.tsdb.path=%v", dir),
fmt.Sprintf("--config.file=%s", configFile),
// Set important flags for the full remote write compliance:
"--enable-feature=st-storage",
)
}
if p.agentMode {
args = append(args, fmt.Sprintf("--storage.agent.path=%v", dir), "--agent")
} else {
args = append(args, fmt.Sprintf("--storage.tsdb.path=%v", dir))
}
return sender.RunCommand(ctx, "../cmd/prometheus", nil, "go", args...)
}
var _ sender.Sender = internalPrometheus{}
// TestRemoteWriteSender runs remote write sender compliance tests defined in
// https://github.com/prometheus/compliance/tree/main/remotewrite/sender
// https://github.com/prometheus/compliance/tree/main/remotewrite/sender against
// both agent and server modes.
func TestRemoteWriteSender(t *testing.T) {
sender.RunTests(t, internalPrometheus{}, sender.ComplianceTests())
t.Run("mode=server", func(t *testing.T) {
t.Parallel()
sender.RunTests(t, internalPrometheus{}, sender.ComplianceTests())
})
t.Run("mode=agent", func(t *testing.T) {
t.Parallel()
sender.RunTests(t, internalPrometheus{agentMode: true}, sender.ComplianceTests())
})
}

View File

@ -83,6 +83,13 @@ func Load(s string, logger *slog.Logger) (*Config, error) {
return nil, err
}
// When the config body is empty, UnmarshalYAML is never called, so
// TSDBConfig may still be nil.
if cfg.StorageConfig.TSDBConfig == nil {
retention := DefaultTSDBRetentionConfig
cfg.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
}
b := labels.NewScratchBuilder(0)
cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) {
newV := os.Expand(v.Value, func(s string) string {
@ -276,6 +283,9 @@ var (
// For backwards compatibility.
LabelNamePreserveMultipleUnderscores: true,
}
// DefaultTSDBRetentionConfig is the default TSDB retention configuration.
DefaultTSDBRetentionConfig TSDBRetentionConfig
)
// Config is the top-level configuration for Prometheus's config files.
@ -405,6 +415,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(any) error) error {
c.Runtime = DefaultRuntimeConfig
}
// If no storage.tsdb section is present, TSDBConfig is nil and its
// UnmarshalYAML never runs. Inject the default retention here.
if c.StorageConfig.TSDBConfig == nil {
retention := DefaultTSDBRetentionConfig
c.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
}
for _, rf := range c.RuleFiles {
if !patRulePath.MatchString(rf) {
return fmt.Errorf("invalid rule file path %q", rf)
@ -1097,6 +1114,22 @@ type TSDBRetentionConfig struct {
Percentage uint `yaml:"percentage,omitempty"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (t *TSDBRetentionConfig) UnmarshalYAML(unmarshal func(any) error) error {
*t = TSDBRetentionConfig{}
type plain TSDBRetentionConfig
if err := unmarshal((*plain)(t)); err != nil {
return err
}
if t.Size < 0 {
return fmt.Errorf("'storage.tsdb.retention.size' must be greater than or equal to 0, got %v", t.Size)
}
if t.Percentage > 100 {
return fmt.Errorf("'storage.tsdb.retention.percentage' must be in the range [0, 100], got %v", t.Percentage)
}
return nil
}
// TSDBConfig configures runtime reloadable configuration options.
type TSDBConfig struct {
// OutOfOrderTimeWindow sets how long back in time an out-of-order sample can be inserted
@ -1127,6 +1160,11 @@ func (t *TSDBConfig) UnmarshalYAML(unmarshal func(any) error) error {
t.OutOfOrderTimeWindow = time.Duration(t.OutOfOrderTimeWindowFlag).Milliseconds()
if t.Retention == nil {
retention := DefaultTSDBRetentionConfig
t.Retention = &retention
}
return nil
}

View File

@ -20,9 +20,10 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
var ruleFilesExpectedConf = &Config{
loaded: true,
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
OTLPConfig: DefaultOTLPConfig,
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
OTLPConfig: DefaultOTLPConfig,
StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}},
RuleFiles: []string{
"testdata/first.rules",
"testdata/rules/second.rules",

View File

@ -2626,6 +2626,22 @@ var expectedErrors = []struct {
filename: "stackit_endpoint.bad.yml",
errMsg: "invalid endpoint",
},
{
filename: "tsdb_retention_time.bad.yml",
errMsg: `not a valid duration string: "-1h"`,
},
{
filename: "tsdb_retention_size.bad.yml",
errMsg: `'storage.tsdb.retention.size' must be greater than or equal to 0`,
},
{
filename: "tsdb_retention_percentage.bad.yml",
errMsg: `'storage.tsdb.retention.percentage' must be in the range [0, 100]`,
},
{
filename: "tsdb_retention_percentage_negative.bad.yml",
errMsg: "cannot unmarshal !!int `-1` into uint",
},
}
func TestBadConfigs(t *testing.T) {
@ -2649,6 +2665,8 @@ func TestEmptyConfig(t *testing.T) {
require.NoError(t, err)
exp := DefaultConfig
exp.loaded = true
retention := DefaultTSDBRetentionConfig
exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
require.Equal(t, exp, *c)
require.Equal(t, 75, c.Runtime.GoGC)
}
@ -2700,6 +2718,10 @@ func TestGlobalConfig(t *testing.T) {
require.NoError(t, err)
exp := DefaultConfig
exp.loaded = true
// TSDBConfig is always injected by Config.UnmarshalYAML even when no
// storage.tsdb section is present, so the expected config must include it.
retention := DefaultTSDBRetentionConfig
exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
require.Equal(t, exp, *c)
})

View File

@ -18,8 +18,9 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
var ruleFilesExpectedConf = &Config{
loaded: true,
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}},
RuleFiles: []string{
"testdata\\first.rules",
"testdata\\rules\\second.rules",

View File

@ -0,0 +1,4 @@
storage:
tsdb:
retention:
percentage: 101

View File

@ -0,0 +1,4 @@
storage:
tsdb:
retention:
percentage: -1

View File

@ -0,0 +1,4 @@
storage:
tsdb:
retention:
size: -1GB

View File

@ -0,0 +1,4 @@
storage:
tsdb:
retention:
time: -1h

View File

@ -298,7 +298,10 @@ func newCredential(cfg SDConfig, policyClientOptions policy.ClientOptions) (azco
}
credential = azcore.TokenCredential(workloadIdentityCredential)
case authMethodManagedIdentity:
options := &azidentity.ManagedIdentityCredentialOptions{ClientOptions: policyClientOptions, ID: azidentity.ClientID(cfg.ClientID)}
options := &azidentity.ManagedIdentityCredentialOptions{ClientOptions: policyClientOptions}
if cfg.ClientID != "" {
options.ID = azidentity.ClientID(cfg.ClientID)
}
managedIdentityCredential, err := azidentity.NewManagedIdentityCredential(options)
if err != nil {
return nil, err

View File

@ -24,6 +24,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake"
@ -490,6 +491,27 @@ func TestNewAzureResourceFromID(t *testing.T) {
}
}
func TestNewCredentialManagedIdentity(t *testing.T) {
// Test that system-assigned managed identity (empty ClientID) creates
// a valid credential. Previously, an empty ClientID was passed as
// azidentity.ClientID("") which is not nil and caused Azure SDK to
// look up a non-existent user-assigned identity instead of falling
// back to system-assigned identity.
cfg := SDConfig{
AuthenticationMethod: authMethodManagedIdentity,
ClientID: "",
}
cred, err := newCredential(cfg, policy.ClientOptions{})
require.NoError(t, err)
require.NotNil(t, cred)
// Test that user-assigned managed identity (non-empty ClientID) also works.
cfg.ClientID = "00000000-0000-0000-0000-000000000000"
cred, err = newCredential(cfg, policy.ClientOptions{})
require.NoError(t, err)
require.NotNil(t, cred)
}
func TestAzureRefresh(t *testing.T) {
tests := []struct {
scenario string

View File

@ -38,8 +38,10 @@ const (
hetznerLabelHcloudImageOSVersion = hetznerHcloudLabelPrefix + "image_os_version"
hetznerLabelHcloudImageOSFlavor = hetznerHcloudLabelPrefix + "image_os_flavor"
hetznerLabelHcloudPrivateIPv4 = hetznerHcloudLabelPrefix + "private_ipv4_"
hetznerLabelHcloudDatacenterLocation = hetznerHcloudLabelPrefix + "datacenter_location"
hetznerLabelHcloudDatacenterLocationNetworkZone = hetznerHcloudLabelPrefix + "datacenter_location_network_zone"
hetznerLabelHcloudLocation = hetznerHcloudLabelPrefix + "location"
hetznerLabelHcloudLocationNetworkZone = hetznerHcloudLabelPrefix + "location_network_zone"
hetznerLabelHcloudDatacenterLocation = hetznerHcloudLabelPrefix + "datacenter_location" // Label name kept for backward compatibility
hetznerLabelHcloudDatacenterLocationNetworkZone = hetznerHcloudLabelPrefix + "datacenter_location_network_zone" // Label name kept for backward compatibility
hetznerLabelHcloudCPUCores = hetznerHcloudLabelPrefix + "cpu_cores"
hetznerLabelHcloudCPUType = hetznerHcloudLabelPrefix + "cpu_type"
hetznerLabelHcloudMemoryGB = hetznerHcloudLabelPrefix + "memory_size_gb"
@ -98,13 +100,14 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
hetznerLabelRole: model.LabelValue(HetznerRoleHcloud),
hetznerLabelServerID: model.LabelValue(strconv.FormatInt(server.ID, 10)),
hetznerLabelServerName: model.LabelValue(server.Name),
hetznerLabelDatacenter: model.LabelValue(server.Datacenter.Name), //nolint:staticcheck // server.Datacenter is deprecated but kept for backwards compatibility until the next minor release
hetznerLabelPublicIPv4: model.LabelValue(server.PublicNet.IPv4.IP.String()),
hetznerLabelPublicIPv6Network: model.LabelValue(server.PublicNet.IPv6.Network.String()),
hetznerLabelServerStatus: model.LabelValue(server.Status),
hetznerLabelHcloudDatacenterLocation: model.LabelValue(server.Datacenter.Location.Name), //nolint:staticcheck // server.Datacenter is deprecated but kept for backwards compatibility until the next minor release
hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Datacenter.Location.NetworkZone), //nolint:staticcheck // server.Datacenter is deprecated but kept for backwards compatibility until the next minor release
hetznerLabelHcloudLocation: model.LabelValue(server.Location.Name),
hetznerLabelHcloudLocationNetworkZone: model.LabelValue(server.Location.NetworkZone),
hetznerLabelHcloudDatacenterLocation: model.LabelValue(server.Location.Name), // Label name kept for backward compatibility
hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Location.NetworkZone), // Label name kept for backward compatibility
hetznerLabelHcloudType: model.LabelValue(server.ServerType.Name),
hetznerLabelHcloudCPUCores: model.LabelValue(strconv.Itoa(server.ServerType.Cores)),
hetznerLabelHcloudCPUType: model.LabelValue(server.ServerType.CPUType),
@ -114,6 +117,12 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
model.AddressLabel: model.LabelValue(net.JoinHostPort(server.PublicNet.IPv4.IP.String(), strconv.FormatUint(uint64(d.port), 10))),
}
// [hcloud.Server.Datacenter] is deprecated and will be removed after 1 July 2026.
// See https://docs.hetzner.cloud/changelog#2025-12-16-phasing-out-datacenters
if server.Datacenter != nil { // nolint: staticcheck
labels[hetznerLabelDatacenter] = model.LabelValue(server.Datacenter.Name) // nolint: staticcheck
}
if server.Image != nil {
labels[hetznerLabelHcloudImageName] = model.LabelValue(server.Image.Name)
labels[hetznerLabelHcloudImageDescription] = model.LabelValue(server.Image.Description)

View File

@ -69,6 +69,8 @@ func TestHCloudSDRefresh(t *testing.T) {
"__meta_hetzner_hcloud_image_description": model.LabelValue("Ubuntu 20.04 Standard 64 bit"),
"__meta_hetzner_hcloud_image_os_flavor": model.LabelValue("ubuntu"),
"__meta_hetzner_hcloud_image_os_version": model.LabelValue("20.04"),
"__meta_hetzner_hcloud_location": model.LabelValue("fsn1"),
"__meta_hetzner_hcloud_location_network_zone": model.LabelValue("eu-central"),
"__meta_hetzner_hcloud_datacenter_location": model.LabelValue("fsn1"),
"__meta_hetzner_hcloud_datacenter_location_network_zone": model.LabelValue("eu-central"),
"__meta_hetzner_hcloud_cpu_cores": model.LabelValue("1"),
@ -93,6 +95,8 @@ func TestHCloudSDRefresh(t *testing.T) {
"__meta_hetzner_hcloud_image_description": model.LabelValue("Ubuntu 20.04 Standard 64 bit"),
"__meta_hetzner_hcloud_image_os_flavor": model.LabelValue("ubuntu"),
"__meta_hetzner_hcloud_image_os_version": model.LabelValue("20.04"),
"__meta_hetzner_hcloud_location": model.LabelValue("fsn1"),
"__meta_hetzner_hcloud_location_network_zone": model.LabelValue("eu-central"),
"__meta_hetzner_hcloud_datacenter_location": model.LabelValue("fsn1"),
"__meta_hetzner_hcloud_datacenter_location_network_zone": model.LabelValue("eu-central"),
"__meta_hetzner_hcloud_cpu_cores": model.LabelValue("2"),
@ -114,6 +118,8 @@ func TestHCloudSDRefresh(t *testing.T) {
"__meta_hetzner_datacenter": model.LabelValue("fsn1-dc14"),
"__meta_hetzner_public_ipv4": model.LabelValue("1.2.3.6"),
"__meta_hetzner_public_ipv6_network": model.LabelValue("2001:db7::/64"),
"__meta_hetzner_hcloud_location": model.LabelValue("fsn1"),
"__meta_hetzner_hcloud_location_network_zone": model.LabelValue("eu-central"),
"__meta_hetzner_hcloud_datacenter_location": model.LabelValue("fsn1"),
"__meta_hetzner_hcloud_datacenter_location_network_zone": model.LabelValue("eu-central"),
"__meta_hetzner_hcloud_cpu_cores": model.LabelValue("2"),

View File

@ -36,7 +36,7 @@ const (
hetznerLabelServerID = hetznerLabelPrefix + "server_id"
hetznerLabelServerName = hetznerLabelPrefix + "server_name"
hetznerLabelServerStatus = hetznerLabelPrefix + "server_status"
hetznerLabelDatacenter = hetznerLabelPrefix + "datacenter"
hetznerLabelDatacenter = hetznerLabelPrefix + "datacenter" // Label name kept for backward compatibility
hetznerLabelPublicIPv4 = hetznerLabelPrefix + "public_ipv4"
hetznerLabelPublicIPv6Network = hetznerLabelPrefix + "public_ipv6_network"
)

View File

@ -124,6 +124,16 @@ func (m *SDMock) HandleHcloudServers() {
"storage_type": "local",
"cpu_type": "shared"
},
"location": {
"id": 1,
"name": "fsn1",
"description": "Falkenstein DC Park 1",
"country": "DE",
"city": "Falkenstein",
"latitude": 50.47612,
"longitude": 12.370071,
"network_zone": "eu-central"
},
"datacenter": {
"id": 1,
"name": "fsn1-dc8",
@ -244,6 +254,16 @@ func (m *SDMock) HandleHcloudServers() {
"storage_type": "local",
"cpu_type": "shared"
},
"location": {
"id": 1,
"name": "fsn1",
"description": "Falkenstein DC Park 1",
"country": "DE",
"city": "Falkenstein",
"latitude": 50.47612,
"longitude": 12.370071,
"network_zone": "eu-central"
},
"datacenter": {
"id": 2,
"name": "fsn1-dc14",
@ -365,6 +385,16 @@ func (m *SDMock) HandleHcloudServers() {
"storage_type": "local",
"cpu_type": "shared"
},
"location": {
"id": 1,
"name": "fsn1",
"description": "Falkenstein DC Park 1",
"country": "DE",
"city": "Falkenstein",
"latitude": 50.47612,
"longitude": 12.370071,
"network_zone": "eu-central"
},
"datacenter": {
"id": 2,
"name": "fsn1-dc14",

View File

@ -34,9 +34,10 @@ import (
)
const (
hetznerRobotLabelPrefix = hetznerLabelPrefix + "robot_"
hetznerLabelRobotProduct = hetznerRobotLabelPrefix + "product"
hetznerLabelRobotCancelled = hetznerRobotLabelPrefix + "cancelled"
hetznerRobotLabelPrefix = hetznerLabelPrefix + "robot_"
hetznerLabelRobotDatacenter = hetznerRobotLabelPrefix + "datacenter"
hetznerLabelRobotProduct = hetznerRobotLabelPrefix + "product"
hetznerLabelRobotCancelled = hetznerRobotLabelPrefix + "cancelled"
)
var userAgent = version.PrometheusUserAgent()
@ -105,14 +106,15 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error)
targets := make([]model.LabelSet, len(servers))
for i, server := range servers {
labels := model.LabelSet{
hetznerLabelRole: model.LabelValue(HetznerRoleRobot),
hetznerLabelServerID: model.LabelValue(strconv.Itoa(server.Server.ServerNumber)),
hetznerLabelServerName: model.LabelValue(server.Server.ServerName),
hetznerLabelDatacenter: model.LabelValue(strings.ToLower(server.Server.Dc)),
hetznerLabelPublicIPv4: model.LabelValue(server.Server.ServerIP),
hetznerLabelServerStatus: model.LabelValue(server.Server.Status),
hetznerLabelRobotProduct: model.LabelValue(server.Server.Product),
hetznerLabelRobotCancelled: model.LabelValue(strconv.FormatBool(server.Server.Canceled)),
hetznerLabelRole: model.LabelValue(HetznerRoleRobot),
hetznerLabelServerID: model.LabelValue(strconv.Itoa(server.Server.ServerNumber)),
hetznerLabelServerName: model.LabelValue(server.Server.ServerName),
hetznerLabelDatacenter: model.LabelValue(strings.ToLower(server.Server.Dc)), // Label name kept for backward compatibility
hetznerLabelPublicIPv4: model.LabelValue(server.Server.ServerIP),
hetznerLabelServerStatus: model.LabelValue(server.Server.Status),
hetznerLabelRobotDatacenter: model.LabelValue(strings.ToLower(server.Server.Dc)),
hetznerLabelRobotProduct: model.LabelValue(server.Server.Product),
hetznerLabelRobotCancelled: model.LabelValue(strconv.FormatBool(server.Server.Canceled)),
model.AddressLabel: model.LabelValue(net.JoinHostPort(server.Server.ServerIP, strconv.FormatUint(uint64(d.port), 10))),
}

View File

@ -64,19 +64,21 @@ func TestRobotSDRefresh(t *testing.T) {
"__meta_hetzner_public_ipv4": model.LabelValue("123.123.123.123"),
"__meta_hetzner_public_ipv6_network": model.LabelValue("2a01:4f8:111:4221::/64"),
"__meta_hetzner_datacenter": model.LabelValue("nbg1-dc1"),
"__meta_hetzner_robot_datacenter": model.LabelValue("nbg1-dc1"),
"__meta_hetzner_robot_product": model.LabelValue("DS 3000"),
"__meta_hetzner_robot_cancelled": model.LabelValue("false"),
},
{
"__address__": model.LabelValue("123.123.123.124:80"),
"__meta_hetzner_role": model.LabelValue("robot"),
"__meta_hetzner_server_id": model.LabelValue("421"),
"__meta_hetzner_server_name": model.LabelValue("server2"),
"__meta_hetzner_server_status": model.LabelValue("in process"),
"__meta_hetzner_public_ipv4": model.LabelValue("123.123.123.124"),
"__meta_hetzner_datacenter": model.LabelValue("fsn1-dc10"),
"__meta_hetzner_robot_product": model.LabelValue("X5"),
"__meta_hetzner_robot_cancelled": model.LabelValue("true"),
"__address__": model.LabelValue("123.123.123.124:80"),
"__meta_hetzner_role": model.LabelValue("robot"),
"__meta_hetzner_server_id": model.LabelValue("421"),
"__meta_hetzner_server_name": model.LabelValue("server2"),
"__meta_hetzner_server_status": model.LabelValue("in process"),
"__meta_hetzner_public_ipv4": model.LabelValue("123.123.123.124"),
"__meta_hetzner_datacenter": model.LabelValue("fsn1-dc10"),
"__meta_hetzner_robot_datacenter": model.LabelValue("fsn1-dc10"),
"__meta_hetzner_robot_product": model.LabelValue("X5"),
"__meta_hetzner_robot_cancelled": model.LabelValue("true"),
},
} {
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {

View File

@ -59,7 +59,7 @@ The Prometheus monitoring server
| <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
| <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
| <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, st-storage, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers, xor2-encoding. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--agent</code> | Run Prometheus in 'Agent mode'. | |
| <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
| <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |

View File

@ -2238,7 +2238,10 @@ The following meta labels are available on all targets during [relabeling](#rela
* `__meta_hetzner_server_status`: the status of the server
* `__meta_hetzner_public_ipv4`: the public ipv4 address of the server
* `__meta_hetzner_public_ipv6_network`: the public ipv6 network (/64) of the server
* `__meta_hetzner_datacenter`: the datacenter of the server
Note that the `__meta_hetzner_datacenter` label is deprecated for both roles `robot` and `hcloud`:
- For the `robot` role, the replacement label is `__meta_hetzner_robot_datacenter`.
- For the `hcloud` role, the label will be removed after 1 July 2026. For more details, see the [changelog](https://docs.hetzner.cloud/changelog#2025-12-16-phasing-out-datacenters).
The labels below are only available for targets with `role` set to `hcloud`:
@ -2246,8 +2249,10 @@ The labels below are only available for targets with `role` set to `hcloud`:
* `__meta_hetzner_hcloud_image_description`: the description of the server image
* `__meta_hetzner_hcloud_image_os_flavor`: the OS flavor of the server image
* `__meta_hetzner_hcloud_image_os_version`: the OS version of the server image
* `__meta_hetzner_hcloud_datacenter_location`: the location of the server
* `__meta_hetzner_hcloud_datacenter_location_network_zone`: the network zone of the server
* `__meta_hetzner_hcloud_location`: the location of the server
* `__meta_hetzner_hcloud_location_network_zone`: the network zone of the server
* `__meta_hetzner_hcloud_datacenter_location`: the location of the server (deprecated in favor of `__meta_hetzner_hcloud_location`)
* `__meta_hetzner_hcloud_datacenter_location_network_zone`: the network zone of the server (deprecated in favor of `__meta_hetzner_hcloud_location_network_zone`)
* `__meta_hetzner_hcloud_server_type`: the type of the server
* `__meta_hetzner_hcloud_cpu_cores`: the CPU cores count of the server
* `__meta_hetzner_hcloud_cpu_type`: the CPU type of the server (shared or dedicated)
@ -2259,6 +2264,7 @@ The labels below are only available for targets with `role` set to `hcloud`:
The labels below are only available for targets with `role` set to `robot`:
* `__meta_hetzner_robot_datacenter`: the datacenter of the server
* `__meta_hetzner_robot_product`: the product of the server
* `__meta_hetzner_robot_cancelled`: the server cancellation status
@ -3871,9 +3877,9 @@ with this feature.
# or when a compaction completes, whichever comes first.
[ retention: <retention> ] :
# How long to retain samples in storage. If neither this option nor the size option
# is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms.
# is set, the retention time defaults to 15d. Setting this to 0 disables time-based retention.
# This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.time.
[ time: <duration> | default = 15d ]
[ time: <duration> ]
# Maximum number of bytes that can be stored for blocks. A unit is required,
# supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B.

View File

@ -77,6 +77,30 @@ Therefore, when `created-timestamp-zero-ingestion` is enabled Prometheus changes
Besides enabling this feature in Prometheus, start timestamps need to be exposed by the application being scraped.
## Start timestamp (ST) native storage
`--enable-feature=st-storage`
Enables the storage of start timestamps (ST) per sample, through WAL, TSDB/Agent and Remote-Write 2.0. This option
allows preserving the exact ST value as it was presented from scrape and receive protocols. In the future this feature
is meant to be a replacement of `created-timestamp-zero-ingestion` which injects synthetic 0 samples.
Currently, Prometheus supports start timestamps on:
* `PrometheusProto`
* `OpenMetrics1.0.0`
`PrometheusProto` is recommended, due to efficiency of ST passing.
Besides enabling this feature in Prometheus, start timestamps need to be exposed by the application being scraped.
> NOTE: This is an experimental feature with known limitations until fully implemented.
> * It introduces new WAL record type (SamplesV2) that can only be replayed with Prometheus 3.11 or later versions.
> * For persistent storage support (TSDB blocks), you need to manually opt-in for XOR2 chunk format ([`xor2-encoding` flag](#xor2-chunk-encoding)).
> This might change later once we finish experimentation phase with XOR2.
> * ST for native histograms and NHCBs are not yet implemented (see [#18315](https://github.com/prometheus/prometheus/issues/18315)).
> * PromQL use of ST is out of scope of this feature.
## Concurrent evaluation of independent rules
`--enable-feature=concurrent-rule-eval`
@ -306,6 +330,17 @@ This is currently implemented using direct I/O.
For more details, see the [proposal](https://github.com/prometheus/proposals/pull/45).
## XOR2 chunk encoding
`--enable-feature=xor2-encoding`
> WARNING: This is highly experimental and risky setting:
> * Chunks encoded with XOR2 **cannot be read by older Prometheus versions** that do not support the encoding. Once enabled and data is written, you need to **manually delete blocks from the disk**, otherwise Prometheus will return error on all queries.
> * We are still experimenting on the final encoding. As of now this encoding can change in any Prometheus version. All your persistent block data will be lost between versions.
> * This is encoding is new, meaning downstream tools and LTS systems might now support it yet (e.g. Thanos sidecar uploaded blocks).
This setting enables the new XOR2 chunk encoding for float samples, which provides better disk compression than the default XOR encoding for typical Prometheus workloads. This format also allow storing Start Timestamp (ST).
## Extended Range Selectors
`--enable-feature=promql-extended-range-selectors`

View File

@ -219,7 +219,7 @@ to their original value. Histogram samples in the input vector are ignored silen
## `histogram_avg()`
`histogram_avg(v instant-vector)` returns the arithmetic average of observed
values stored in each histogram sample in `v`. Float samples are ignored and do
values stored in each native histogram sample in `v`. Float samples are ignored and do
not show up in the returned vector.
Use `histogram_avg` as demonstrated below to compute the average request duration
@ -236,11 +236,11 @@ Which is equivalent to the following query:
## `histogram_count()` and `histogram_sum()`
`histogram_count(v instant-vector)` returns the count of observations stored in
each histogram sample in `v`. Float samples are ignored and do not show up in
each native histogram sample in `v`. Float samples are ignored and do not show up in
the returned vector.
Similarly, `histogram_sum(v instant-vector)` returns the sum of observations
stored in each histogram sample.
stored in each native histogram sample.
Use `histogram_count` in the following way to calculate a rate of observations
(in this case corresponding to “requests per second”) from a series of
@ -453,14 +453,14 @@ histogram_quantiles(sum(rate(foo[1m])), "quantile", 0.9, 0.99)
## `histogram_stddev()` and `histogram_stdvar()`
`histogram_stddev(v instant-vector)` returns the estimated standard deviation
of observations for each histogram sample in `v`. For this estimation, all observations
of observations for each native histogram sample in `v`. For this estimation, all observations
in a bucket are assumed to have the value of the mean of the bucket boundaries. For
the zero bucket and for buckets with custom boundaries, the arithmetic mean is used.
For the usual exponential buckets, the geometric mean is used. Float samples are ignored
and do not show up in the returned vector.
Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard
variance of observations for each histogram sample in `v`.
variance of observations for each native histogram sample in `v`.
## `hour()`

View File

@ -105,7 +105,9 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo
enableCompression: true,
validationScheme: model.UTF8Validation,
symbolTable: labels.NewSymbolTable(),
appendMetadataToWAL: true, // Tests assumes it's enabled, unless explicitly turned off.
// Tests assume those features are enabled, unless explicitly turned off.
appendMetadataToWAL: true,
parseST: true,
}
for _, o := range opts {
o(sl)

View File

@ -115,8 +115,26 @@ type Options struct {
// Option to enable the ingestion of the created timestamp as a synthetic zero sample.
// See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md
//
// NOTE: This option has no effect for AppenderV2 and will be removed with the AppenderV1
// removal.
EnableStartTimestampZeroIngestion bool
// ParseST controls if ST should be parsed and appended from the scrape formats.
// This should be by default true, but it's opt-in for OpenMetrics (OM) 1.0 reasons and might be moved
// to OM 1.0 only flow.
//
// Specifically for OpenMetrics 1.0 flow, it can have some additional effects that might not be desired for non-ST users:
//
// * OpenMetrics 1.0 <metric>_created series will be parsed as ST instead of normal sample. Could be breaking
// if downstream user depends on _created metric. TODO(bwplotka): Add "preserveOMLines" hidden option?
// * Add relatively small (but still) overhead.
// * Can yield wrong ST values in rare edge cases (unknown metadata and metric name collisions).
//
// This only applies to AppenderV2 flow (Prometheus default).
// TODO: Move this option to OM1 parser and use only on OM1 flow.
ParseST bool
// EnableTypeAndUnitLabels represents type-and-unit-labels feature flag.
EnableTypeAndUnitLabels bool

View File

@ -768,6 +768,7 @@ func TestManagerSTZeroIngestion(t *testing.T) {
app := teststorage.NewAppendable()
discoveryManager, scrapeManager := runManagers(t, ctx, &Options{
EnableStartTimestampZeroIngestion: testSTZeroIngest,
ParseST: testSTZeroIngest,
skipJitterOffsetting: true,
}, app, nil)
defer scrapeManager.Stop()
@ -954,6 +955,7 @@ func TestManagerSTZeroIngestionHistogram(t *testing.T) {
app := teststorage.NewAppendable()
discoveryManager, scrapeManager := runManagers(t, ctx, &Options{
EnableStartTimestampZeroIngestion: tc.enableSTZeroIngestion,
ParseST: tc.enableSTZeroIngestion,
skipJitterOffsetting: true,
}, app, nil)
defer scrapeManager.Stop()
@ -1066,6 +1068,7 @@ func TestNHCBAndSTZeroIngestion(t *testing.T) {
app := teststorage.NewAppendable()
discoveryManager, scrapeManager := runManagers(t, ctx, &Options{
EnableStartTimestampZeroIngestion: true,
ParseST: true,
skipJitterOffsetting: true,
}, app, nil)
defer scrapeManager.Stop()

View File

@ -870,6 +870,7 @@ type scrapeLoop struct {
// Options from scrape.Options.
enableSTZeroIngestion bool
parseST bool // Used by AppenderV2 only.
enableTypeAndUnitLabels bool
reportExtraMetrics bool
appendMetadataToWAL bool
@ -1224,7 +1225,12 @@ func newScrapeLoop(opts scrapeLoopOptions) *scrapeLoop {
validationScheme: opts.sp.config.MetricNameValidationScheme,
// scrape.Options.
enableSTZeroIngestion: opts.sp.options.EnableStartTimestampZeroIngestion,
enableSTZeroIngestion: opts.sp.options.EnableStartTimestampZeroIngestion,
// parseST was added recently. Before EnableStartTimestampZeroIngestion
// was enabling parsing ST. For non-Prometheus users of the scrape
// manager, we ensure appenderV2 parseST is set on EnableStartTimestampZeroIngestion
// This will be removed when EnableStartTimestampZeroIngestion is removed.
parseST: opts.sp.options.ParseST || opts.sp.options.EnableStartTimestampZeroIngestion,
enableTypeAndUnitLabels: opts.sp.options.EnableTypeAndUnitLabels,
appendMetadataToWAL: opts.sp.options.AppendMetadata,
passMetadataInContext: opts.sp.options.PassMetadataInContext,
@ -1253,9 +1259,8 @@ func (sl *scrapeLoop) getScrapeOffset() time.Duration {
func (sl *scrapeLoop) run(errc chan<- error) {
var (
last time.Time
alignedScrapeTime = time.Now().Round(0)
ticker = time.NewTicker(sl.interval)
last time.Time
ticker = time.NewTicker(sl.interval)
)
defer func() {
if sl.scrapeOnShutdown {
@ -1282,6 +1287,10 @@ func (sl *scrapeLoop) run(errc chan<- error) {
}
}
// Reset the ticker so target scrape times are aligned to the offset+intervals.
ticker.Reset(sl.interval)
alignedScrapeTime := time.Now().Round(0)
for {
select {
case <-sl.ctx.Done():

View File

@ -102,7 +102,7 @@ func (sl *scrapeLoopAppenderV2) append(b []byte, contentType string, ts time.Tim
IgnoreNativeHistograms: !sl.enableNativeHistogramScraping,
ConvertClassicHistogramsToNHCB: sl.convertClassicHistToNHCB,
KeepClassicOnClassicAndNativeHistograms: sl.alwaysScrapeClassicHist,
OpenMetricsSkipSTSeries: sl.enableSTZeroIngestion,
OpenMetricsSkipSTSeries: sl.parseST,
FallbackContentType: sl.fallbackScrapeProtocol,
})
if p == nil {
@ -254,7 +254,7 @@ loop:
}
st := int64(0)
if sl.enableSTZeroIngestion {
if sl.parseST {
// p.StartTimestamp() tend to be expensive (e.g. OM1). Do it only if we care.
st = p.StartTimestamp()
}

View File

@ -24,6 +24,7 @@ import (
"log/slog"
"maps"
"math"
"net"
"net/http"
"net/http/httptest"
"net/url"
@ -51,6 +52,7 @@ import (
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.uber.org/atomic"
"go.uber.org/goleak"
"go.yaml.in/yaml/v2"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@ -69,6 +71,7 @@ import (
"github.com/prometheus/prometheus/util/pool"
"github.com/prometheus/prometheus/util/teststorage"
"github.com/prometheus/prometheus/util/testutil"
"github.com/prometheus/prometheus/util/testutil/synctest"
)
func TestMain(m *testing.M) {
@ -1546,6 +1549,14 @@ func TestPromTextToProto(t *testing.T) {
require.Equal(t, "promhttp_metric_handler_requests_total", got[236])
}
func seriesPerHistogramFor100HistsWithExemplars(appV2 bool) int {
if appV2 {
// AppenderV2 with parseST enabled, uses _created lines for ST instead of samples.
return 23
}
return 24
}
// TestScrapeLoopAppend_WithStorage tests appends and storage integration for the
// large input files that are also used in benchmarks.
func TestScrapeLoopAppend_WithStorage(t *testing.T) {
@ -1631,8 +1642,13 @@ func TestScrapeLoopAppend_WithStorage(t *testing.T) {
name: "100HistsWithExemplars",
parsableText: makeTestHistogramsWithExemplars(100),
expectedSamplesLen: 24 * 100,
expectedSamplesLen: seriesPerHistogramFor100HistsWithExemplars(appV2) * 100,
testAppendedSamples: func(t *testing.T, committed []sample) {
st := int64(0)
if appV2 {
st = 1726839813016
}
// Verify a few samples.
m := metadata.Metadata{Type: model.MetricTypeHistogram, Help: "RPC latency distributions."}
testutil.RequireEqual(t, sample{
@ -1642,7 +1658,7 @@ func TestScrapeLoopAppend_WithStorage(t *testing.T) {
}
return "rpc_durations_histogram0_seconds"
}(),
M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"), V: 15, T: timestamp.FromTime(ts),
M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"), V: 15, ST: st, T: timestamp.FromTime(ts),
ES: []exemplar.Exemplar{
{Labels: labels.FromStrings("dummyID", "9818"), Value: 0.0002791130914009552, Ts: 1726839814982, HasTs: true},
},
@ -1654,17 +1670,24 @@ func TestScrapeLoopAppend_WithStorage(t *testing.T) {
}
return "rpc_durations_histogram49_seconds"
}(),
M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram49_seconds_sum"), V: -8.452185437166741e-05, T: timestamp.FromTime(ts),
}, committed[24*50-3])
M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram49_seconds_sum"), V: -8.452185437166741e-05, ST: st, T: timestamp.FromTime(ts),
}, committed[seriesPerHistogramFor100HistsWithExemplars(appV2)*49+21])
// This series does not have metadata, nor metric family, because of isSeriesPartOfFamily bug and OpenMetric 1.0 limitations around _created series.
// TODO(bwplotka): Fix with https://github.com/prometheus/prometheus/issues/17900
testutil.RequireEqual(t, sample{
L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_created"), V: 1.726839813016302e+09, T: timestamp.FromTime(ts),
}, committed[len(committed)-1])
if !appV2 {
// This series does not have metadata, nor metric family, because of isSeriesPartOfFamily bug and OpenMetric 1.0 limitations around _created series.
// TODO(bwplotka): Fix with https://github.com/prometheus/prometheus/issues/17900
testutil.RequireEqual(t, sample{
L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_created"), V: 1.726839813016302e+09, T: timestamp.FromTime(ts),
}, committed[len(committed)-1])
} else {
testutil.RequireEqual(t, sample{
MF: "rpc_durations_histogram99_seconds",
M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_count"), V: 15, ST: st, T: timestamp.FromTime(ts),
}, committed[len(committed)-1])
}
},
testExemplars: func(t *testing.T, er []exemplar.QueryResult) {
// 12 out of 24 histogram series have exemplars.
// 12 out of 23/24 histogram series have exemplars.
require.Len(t, er, 12*100)
testutil.RequireEqual(t, exemplar.QueryResult{
SeriesLabels: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"),
@ -2901,6 +2924,11 @@ func TestScrapeLoopAppend(t *testing.T) {
}
func testScrapeLoopAppend(t *testing.T, appV2 bool) {
st := int64(0)
if appV2 {
st = 111111001
}
for _, test := range []struct {
title string
alwaysScrapeClassicHist bool
@ -2953,6 +2981,32 @@ func testScrapeLoopAppend(t *testing.T, appV2 bool) {
ES: []exemplar.Exemplar{{Labels: labels.FromStrings("a", "abc"), Value: 1, Ts: 10000000, HasTs: true}},
}},
},
{
title: "Metric with ST",
scrapeText: `# TYPE metric counter
metric_total{n="1"} 1.1
metric_created{n="1"} 9999.999
# EOF`,
contentType: "application/openmetrics-text",
samples: func() []sample {
if !appV2 {
return []sample{
{
L: labels.FromStrings("__name__", "metric_total", "n", "1"),
V: 1.1,
},
{
L: labels.FromStrings("__name__", "metric_created", "n", "1"),
V: 9999.999,
},
}
}
return []sample{{
L: labels.FromStrings("__name__", "metric_total", "n", "1"),
ST: 9999999, V: 1.1,
}}
}(),
},
{
title: "Two metrics and exemplars",
scrapeText: `metric_total{n="1"} 1 # {t="1"} 1.0 10000
@ -2970,7 +3024,7 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000
}},
},
{
title: "Native histogram with three exemplars from classic buckets",
title: "Native histogram with ST and three exemplars from classic buckets",
enableNativeHistogramsIngestion: true,
scrapeText: `name: "test_histogram"
@ -2978,6 +3032,10 @@ help: "Test histogram with many buckets removed to keep it manageable in size."
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 111111
nanos: 1000000
>
sample_count: 175
sample_sum: 0.0008280461746287094
bucket: <
@ -3060,8 +3118,9 @@ metric: <
`,
contentType: "application/vnd.google.protobuf",
samples: []sample{{
T: 1234568,
L: labels.FromStrings("__name__", "test_histogram"),
T: 1234568,
ST: st,
L: labels.FromStrings("__name__", "test_histogram"),
H: &histogram.Histogram{
Count: 175,
ZeroCount: 2,
@ -3087,7 +3146,7 @@ metric: <
}},
},
{
title: "Native histogram with three exemplars scraped as classic histogram",
title: "Native histogram with ST and three exemplars scraped as classic histogram",
enableNativeHistogramsIngestion: true,
scrapeText: `name: "test_histogram"
@ -3095,6 +3154,10 @@ help: "Test histogram with many buckets removed to keep it manageable in size."
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 111111
nanos: 1000000
>
sample_count: 175
sample_sum: 0.0008280461746287094
bucket: <
@ -3179,8 +3242,9 @@ metric: <
contentType: "application/vnd.google.protobuf",
samples: []sample{
{
T: 1234568,
L: labels.FromStrings("__name__", "test_histogram"),
T: 1234568,
ST: st,
L: labels.FromStrings("__name__", "test_histogram"),
H: &histogram.Histogram{
Count: 175,
ZeroCount: 2,
@ -3205,26 +3269,26 @@ metric: <
{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
},
},
{L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175},
{L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094},
{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), T: 1234568, V: 2},
{L: labels.FromStrings("__name__", "test_histogram_count"), ST: st, T: 1234568, V: 175},
{L: labels.FromStrings("__name__", "test_histogram_sum"), ST: st, T: 1234568, V: 0.0008280461746287094},
{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), ST: st, T: 1234568, V: 2},
{
L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), T: 1234568, V: 4,
L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), ST: st, T: 1234568, V: 4,
ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}},
},
{
L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), T: 1234568, V: 16,
L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), ST: st, T: 1234568, V: 16,
ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false}},
},
{
L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), T: 1234568, V: 32,
L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), ST: st, T: 1234568, V: 32,
ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}},
},
{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), ST: st, T: 1234568, V: 175},
},
},
{
title: "Native histogram with exemplars and no classic buckets",
title: "Native histogram with ST, exemplars and no classic buckets",
contentType: "application/vnd.google.protobuf",
enableNativeHistogramsIngestion: true,
scrapeText: `name: "test_histogram"
@ -3232,6 +3296,10 @@ help: "Test histogram."
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 111111
nanos: 1000000
>
sample_count: 175
sample_sum: 0.0008280461746287094
schema: 3
@ -3297,8 +3365,9 @@ metric: <
`,
samples: []sample{{
T: 1234568,
L: labels.FromStrings("__name__", "test_histogram"),
T: 1234568,
ST: st,
L: labels.FromStrings("__name__", "test_histogram"),
H: &histogram.Histogram{
Count: 175,
ZeroCount: 2,
@ -3324,7 +3393,7 @@ metric: <
}},
},
{
title: "Native histogram with exemplars but ingestion disabled",
title: "Native histogram with ST, exemplars but ingestion disabled",
contentType: "application/vnd.google.protobuf",
enableNativeHistogramsIngestion: false,
scrapeText: `name: "test_histogram"
@ -3332,6 +3401,10 @@ help: "Test histogram."
type: HISTOGRAM
metric: <
histogram: <
created_timestamp: <
seconds: 111111
nanos: 1000000
>
sample_count: 175
sample_sum: 0.0008280461746287094
schema: 3
@ -3397,9 +3470,9 @@ metric: <
`,
samples: []sample{
{L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175},
{L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094},
{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
{L: labels.FromStrings("__name__", "test_histogram_count"), ST: st, T: 1234568, V: 175},
{L: labels.FromStrings("__name__", "test_histogram_sum"), ST: st, T: 1234568, V: 0.0008280461746287094},
{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), ST: st, T: 1234568, V: 175},
},
},
} {
@ -3421,7 +3494,7 @@ metric: <
// This test does not care about metadata.
// Having this true would mean we need to add metadata to sample
// expectations.
// TODO(bwplotka): Add cases for append metadata to WAL and pass metadata
// TODO(bwplotka): Add cases for append metadata to WAL and pass metadata.
sl.appendMetadataToWAL = false
})
app := sl.appender()
@ -6786,3 +6859,100 @@ func TestScrapePoolSetScrapeFailureLoggerRace(t *testing.T) {
wg.Wait()
}
func TestScrapeOffsetDistribution(t *testing.T) {
interval := 5 * time.Second
synctest.Test(t, func(t *testing.T) {
startTime := time.Now()
listener := newPipeListener()
var mu sync.Mutex
scrapeTimes := make(map[string][]time.Duration)
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
select {
case <-r.Context().Done():
return
default:
mu.Lock()
target := r.URL.Path
scrapeTimes[target] = append(scrapeTimes[target], time.Since(startTime))
mu.Unlock()
w.Header().Set("Content-Type", "text/plain; version=0.0.4")
fmt.Fprintln(w, "expected_metric 1")
}
})
srv := httptest.NewUnstartedServer(handler)
srv.Listener = listener
srv.Start()
t.Cleanup(srv.Close)
app := teststorage.NewAppendable()
opts := &Options{
HTTPClientOptions: []config_util.HTTPClientOption{
config_util.WithDialContextFunc(func(ctx context.Context, _, _ string) (net.Conn, error) {
srvConn, cliConn := net.Pipe()
select {
case listener.conns <- srvConn:
return cliConn, nil
case <-listener.closed:
return nil, net.ErrClosed
case <-ctx.Done():
return nil, ctx.Err()
}
}),
},
}
scrapeManager, err := NewManager(opts, promslog.NewNopLogger(), nil, app, nil, prometheus.NewRegistry())
scrapeManager.offsetSeed = 1 // Set a fixed offset seed for deterministic testing.
require.NoError(t, err)
var targets []model.LabelSet
for i := range 5 {
targets = append(targets, model.LabelSet{
model.SchemeLabel: "http",
model.AddressLabel: model.LabelValue(fmt.Sprintf("target-%d.local", i)),
model.MetricsPathLabel: model.LabelValue(fmt.Sprintf("/metrics/%d", i)),
})
}
scrapeManager.updateTsets(map[string][]*targetgroup.Group{
"test": {{Targets: targets}},
})
cfg := &config.Config{
GlobalConfig: config.GlobalConfig{
ScrapeInterval: model.Duration(interval),
ScrapeTimeout: model.Duration(interval),
ScrapeProtocols: []config.ScrapeProtocol{config.PrometheusProto},
},
ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test"}},
}
cfgText, err := yaml.Marshal(*cfg)
require.NoError(t, err)
cfg = loadConfiguration(t, string(cfgText))
require.NoError(t, scrapeManager.ApplyConfig(cfg))
scrapeManager.reload()
numScrapes := 4
time.Sleep((time.Duration(numScrapes) * interval) + time.Second)
synctest.Wait()
scrapeManager.Stop()
for i := range numScrapes {
uniqueTimes := make(map[time.Duration]struct{})
for _, times := range scrapeTimes {
if i < len(times) {
uniqueTimes[times[i]] = struct{}{}
}
}
require.Greater(t, len(uniqueTimes), 2, "Expected targets to be scraped at staggered offsets rather than simultaneously at scrape index %d", i)
}
})
}

View File

@ -761,11 +761,12 @@ outer:
default:
}
if t.shards.enqueue(s.Ref, timeSeries{
seriesLabels: lbls,
metadata: meta,
timestamp: s.T,
value: s.V,
sType: tSample,
seriesLabels: lbls,
metadata: meta,
startTimestamp: s.ST,
timestamp: s.T,
value: s.V,
sType: tSample,
}) {
continue outer
}
@ -883,9 +884,10 @@ outer:
if t.shards.enqueue(h.Ref, timeSeries{
seriesLabels: lbls,
metadata: meta,
timestamp: h.T,
histogram: h.H,
sType: tHistogram,
// TODO(bwplotka): Populate ST once histogram Ref has it.
timestamp: h.T,
histogram: h.H,
sType: tHistogram,
}) {
continue outer
}
@ -942,8 +944,9 @@ outer:
default:
}
if t.shards.enqueue(h.Ref, timeSeries{
seriesLabels: lbls,
metadata: meta,
seriesLabels: lbls,
metadata: meta,
// TODO(bwplotka): Populate ST once histogram Ref has it.
timestamp: h.T,
floatHistogram: h.FH,
sType: tFloatHistogram,
@ -1397,13 +1400,13 @@ type queue struct {
}
type timeSeries struct {
seriesLabels labels.Labels
value float64
histogram *histogram.Histogram
floatHistogram *histogram.FloatHistogram
metadata *metadata.Metadata
timestamp int64
exemplarLabels labels.Labels
seriesLabels labels.Labels
value float64
histogram *histogram.Histogram
floatHistogram *histogram.FloatHistogram
metadata *metadata.Metadata
startTimestamp, timestamp int64
exemplarLabels labels.Labels
// The type of series: sample, exemplar, or histogram.
sType seriesType
}
@ -1994,8 +1997,9 @@ func populateV2TimeSeries(symbolTable *writev2.SymbolsTable, batch []timeSeries,
switch d.sType {
case tSample:
pendingData[nPending].Samples = append(pendingData[nPending].Samples, writev2.Sample{
Value: d.value,
Timestamp: d.timestamp,
Value: d.value,
Timestamp: d.timestamp,
StartTimestamp: d.startTimestamp,
})
nPendingSamples++
case tExemplar:
@ -2006,9 +2010,11 @@ func populateV2TimeSeries(symbolTable *writev2.SymbolsTable, batch []timeSeries,
})
nPendingExemplars++
case tHistogram:
// TODO(bwplotka): Extend with ST once histograms populate it.
pendingData[nPending].Histograms = append(pendingData[nPending].Histograms, writev2.FromIntHistogram(d.timestamp, d.histogram))
nPendingHistograms++
case tFloatHistogram:
// TODO(bwplotka): Extend with ST once histograms populate it.
pendingData[nPending].Histograms = append(pendingData[nPending].Histograms, writev2.FromFloatHistogram(d.timestamp, d.floatHistogram))
nPendingHistograms++
case tMetadata:

View File

@ -143,7 +143,10 @@ func TestBasicContentNegotiation(t *testing.T) {
s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false)
defer s.Close()
recs := testwal.GenerateRecords(recCase{Series: 1, SamplesPerSeries: 1})
recs := testwal.GenerateRecords(recCase{
NoST: tc.senderProtoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: 1, SamplesPerSeries: 1,
})
conf.RemoteWriteConfigs[0].ProtobufMessage = tc.senderProtoMsg
require.NoError(t, s.ApplyConfig(conf))
@ -225,6 +228,7 @@ func TestSampleDelivery(t *testing.T) {
s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false)
defer s.Close()
rc.NoST = protoMsg == remoteapi.WriteV1MessageType // RW1 does not support ST.
recs := testwal.GenerateRecords(rc)
var (
@ -388,7 +392,10 @@ func TestSampleDeliveryTimeout(t *testing.T) {
t.Parallel()
for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} {
t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
recs := testwal.GenerateRecords(recCase{Series: 10, SamplesPerSeries: 10})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: 10, SamplesPerSeries: 10,
})
cfg := testDefaultQueueConfig()
mcfg := config.DefaultMetadataConfig
cfg.MaxShards = 1
@ -417,7 +424,10 @@ func TestSampleDeliveryOrder(t *testing.T) {
t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
ts := 10
n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
recs := testwal.GenerateRecords(recCase{Series: n, SamplesPerSeries: 1})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: n, SamplesPerSeries: 1,
})
c, m := newTestClientAndQueueManager(t, defaultFlushDeadline, protoMsg)
c.expectSamples(recs.Samples, recs.Series)
@ -446,7 +456,10 @@ func TestShutdown(t *testing.T) {
m := newTestQueueManager(t, cfg, mcfg, deadline, c, protoMsg)
// Send 2x batch size, so we know it will need at least two sends.
n := 2 * config.DefaultQueueConfig.MaxSamplesPerSend
recs := testwal.GenerateRecords(recCase{Series: n / 1000, SamplesPerSeries: 1000})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: n / 1000, SamplesPerSeries: 1000,
})
m.StoreSeries(recs.Series, 0)
m.Start()
@ -515,7 +528,10 @@ func TestReshard(t *testing.T) {
size := 10 // Make bigger to find more races.
nSeries := 6
samplesPerSeries := config.DefaultQueueConfig.Capacity * size
recs := testwal.GenerateRecords(recCase{Series: nSeries, SamplesPerSeries: samplesPerSeries})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: nSeries, SamplesPerSeries: samplesPerSeries,
})
t.Logf("about to send %v samples", len(recs.Samples))
cfg := config.DefaultQueueConfig
@ -591,7 +607,10 @@ func TestReshardPartialBatch(t *testing.T) {
t.Parallel()
for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} {
t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
recs := testwal.GenerateRecords(recCase{Series: 1, SamplesPerSeries: 10})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: 1, SamplesPerSeries: 10,
})
c := NewTestBlockedWriteClient()
@ -636,7 +655,10 @@ func TestReshardPartialBatch(t *testing.T) {
func TestQueueFilledDeadlock(t *testing.T) {
for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} {
t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
recs := testwal.GenerateRecords(recCase{Series: 50, SamplesPerSeries: 1})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: 50, SamplesPerSeries: 1,
})
c := NewNopWriteClient()
@ -835,8 +857,8 @@ func getSeriesIDFromRef(r record.RefSeries) string {
// TestWriteClient represents write client which does not call remote storage,
// but instead re-implements fake WriteHandler for test purposes.
type TestWriteClient struct {
receivedSamples map[string][]prompb.Sample
expectedSamples map[string][]prompb.Sample
receivedSamples map[string][]writev2.Sample
expectedSamples map[string][]writev2.Sample
receivedExemplars map[string][]prompb.Exemplar
expectedExemplars map[string][]prompb.Exemplar
receivedHistograms map[string][]prompb.Histogram
@ -860,8 +882,8 @@ type TestWriteClient struct {
// NewTestWriteClient creates a new testing write client.
func NewTestWriteClient(protoMsg remoteapi.WriteMessageType) *TestWriteClient {
return &TestWriteClient{
receivedSamples: map[string][]prompb.Sample{},
expectedSamples: map[string][]prompb.Sample{},
receivedSamples: map[string][]writev2.Sample{},
expectedSamples: map[string][]writev2.Sample{},
receivedMetadata: map[string][]prompb.MetricMetadata{},
expectedMetadata: map[string][]prompb.MetricMetadata{},
protoMsg: protoMsg,
@ -876,18 +898,20 @@ func (c *TestWriteClient) injectErrors(injectedErrs []error) {
c.retry = false
}
// expectSamples injects samples that will be expected on waitForExpectedData.
func (c *TestWriteClient) expectSamples(ss []record.RefSample, series []record.RefSeries) {
c.mtx.Lock()
defer c.mtx.Unlock()
c.expectedSamples = map[string][]prompb.Sample{}
c.receivedSamples = map[string][]prompb.Sample{}
c.expectedSamples = map[string][]writev2.Sample{}
c.receivedSamples = map[string][]writev2.Sample{}
for _, s := range ss {
tsID := getSeriesIDFromRef(series[s.Ref])
c.expectedSamples[tsID] = append(c.expectedSamples[tsID], prompb.Sample{
Timestamp: s.T,
Value: s.V,
c.expectedSamples[tsID] = append(c.expectedSamples[tsID], writev2.Sample{
StartTimestamp: s.ST,
Timestamp: s.T,
Value: s.V,
})
}
}
@ -1065,7 +1089,10 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) (WriteResp
}
}
var reqProto *prompb.WriteRequest
var (
reqProto *prompb.WriteRequest
reqProtoV2 *writev2.Request
)
switch c.protoMsg {
case remoteapi.WriteV1MessageType:
reqProto = &prompb.WriteRequest{}
@ -1073,10 +1100,10 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) (WriteResp
case remoteapi.WriteV2MessageType:
// NOTE(bwplotka): v1 msg can be unmarshaled to v2 sometimes, without
// errors.
var reqProtoV2 writev2.Request
err = proto.Unmarshal(reqBuf, &reqProtoV2)
reqProtoV2 = &writev2.Request{}
err = proto.Unmarshal(reqBuf, reqProtoV2)
if err == nil {
reqProto, err = v2RequestToWriteRequest(&reqProtoV2)
reqProto, err = v2RequestToWriteRequest(reqProtoV2)
}
}
if err != nil {
@ -1085,11 +1112,21 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) (WriteResp
rs := WriteResponseStats{}
b := labels.NewScratchBuilder(0)
for _, ts := range reqProto.Timeseries {
for i, ts := range reqProto.Timeseries {
labels := ts.ToLabels(&b, nil)
tsID := labels.String()
if len(ts.Samples) > 0 {
c.receivedSamples[tsID] = append(c.receivedSamples[tsID], ts.Samples...)
for j, s := range ts.Samples {
st := int64(0)
if reqProtoV2 != nil {
// TODO(bwplotka): Refactor queue manager TestWriteClient for tighter validation
// and native support for new RW2 features. For now we inject STs in RW2 case to the existing test suite.
st = reqProtoV2.Timeseries[i].Samples[j].StartTimestamp
}
c.receivedSamples[tsID] = append(c.receivedSamples[tsID], writev2.Sample{
StartTimestamp: st,
Timestamp: s.Timestamp,
Value: s.Value,
})
}
rs.Samples += len(ts.Samples)
@ -1265,6 +1302,13 @@ var extraLabels []labels.Label = []labels.Label{
{Name: "pod_name", Value: "some-other-name-5j8s8"},
}
// Recommended CLI invocation(s):
/*
export bench=sampleSend && go test ./storage/remote/... \
-run '^$' -bench '^BenchmarkSampleSend' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m -benchmem \
| tee ${bench}.txt
*/
func BenchmarkSampleSend(b *testing.B) {
// Send one sample per series, which is the typical remote_write case
const numSamples = 1
@ -1771,6 +1815,13 @@ func createDummyTimeSeries(instances int) []timeSeries {
return result
}
// Recommended CLI invocation(s):
/*
export bench=buildWriteRequest && go test ./storage/remote/... \
-run '^$' -bench '^BenchmarkBuildWriteRequest' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m -benchmem \
| tee ${bench}.txt
*/
func BenchmarkBuildWriteRequest(b *testing.B) {
noopLogger := promslog.NewNopLogger()
bench := func(b *testing.B, batch []timeSeries) {
@ -1811,6 +1862,13 @@ func BenchmarkBuildWriteRequest(b *testing.B) {
})
}
// Recommended CLI invocation(s):
/*
export bench=buildV2WriteRequest && go test ./storage/remote/... \
-run '^$' -bench '^BenchmarkBuildV2WriteRequest' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m -benchmem \
| tee ${bench}.txt
*/
func BenchmarkBuildV2WriteRequest(b *testing.B) {
noopLogger := promslog.NewNopLogger()
bench := func(b *testing.B, batch []timeSeries) {
@ -1860,7 +1918,9 @@ func TestDropOldTimeSeries(t *testing.T) {
size := 10
nSeries := 6
nSamples := config.DefaultQueueConfig.Capacity * size
noST := protoMsg == remoteapi.WriteV1MessageType // RW1
pastRecs := testwal.GenerateRecords(recCase{
NoST: noST,
Series: nSeries,
SamplesPerSeries: (nSamples / nSeries) / 2, // Half data is past.
TsFn: func(_, j int) int64 {
@ -1869,6 +1929,7 @@ func TestDropOldTimeSeries(t *testing.T) {
},
})
newRecs := testwal.GenerateRecords(recCase{
NoST: noST,
Series: nSeries,
SamplesPerSeries: (nSamples / nSeries) / 2, // Half data is past.
TsFn: func(_, j int) int64 {
@ -1943,6 +2004,7 @@ func TestSendSamplesWithBackoffWithSampleAgeLimit(t *testing.T) {
r := rand.New(rand.NewSource(99))
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: numberOfSeries,
SamplesPerSeries: 1,
TsFn: func(_, _ int) int64 {
@ -1967,9 +2029,10 @@ func TestSendSamplesWithBackoffWithSampleAgeLimit(t *testing.T) {
if !shouldBeDropped {
for _, s := range recs.Samples {
tsID := getSeriesIDFromRef(recs.Series[s.Ref])
c.expectedSamples[tsID] = append(c.expectedSamples[tsID], prompb.Sample{
Timestamp: s.T,
Value: s.V,
c.expectedSamples[tsID] = append(c.expectedSamples[tsID], writev2.Sample{
StartTimestamp: s.ST,
Timestamp: s.T,
Value: s.V,
})
}
}
@ -2490,7 +2553,10 @@ func TestHighestTimestampOnAppend(t *testing.T) {
t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
nSamples := 11 * config.DefaultQueueConfig.Capacity
nSeries := 3
recs := testwal.GenerateRecords(recCase{Series: nSeries, SamplesPerSeries: nSamples / nSeries})
recs := testwal.GenerateRecords(recCase{
NoST: protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
Series: nSeries, SamplesPerSeries: nSamples / nSeries,
})
_, m := newTestClientAndQueueManager(t, defaultFlushDeadline, protoMsg)
m.Start()

View File

@ -341,11 +341,14 @@ func (s *seriesToChunkEncoder) Iterator(it chunks.Iterator) chunks.Iterator {
i := 0
seriesIter := s.Series.Iterator(nil)
lastType := chunkenc.ValNone
lastHadST := false
for typ := seriesIter.Next(); typ != chunkenc.ValNone; typ = seriesIter.Next() {
if typ != lastType || i >= seriesToChunkEncoderSplit {
st := seriesIter.AtST()
hasST := st != 0
if typ != lastType || lastHadST != hasST || i >= seriesToChunkEncoderSplit {
// Create a new chunk if the sample type changed or too many samples in the current one.
chks = appendChunk(chks, mint, maxt, chk)
chk, err = chunkenc.NewEmptyChunk(typ.ChunkEncoding())
chk, err = typ.NewChunk(hasST)
if err != nil {
return errChunksIterator{err: err}
}
@ -358,21 +361,20 @@ func (s *seriesToChunkEncoder) Iterator(it chunks.Iterator) chunks.Iterator {
i = 0
}
lastType = typ
lastHadST = hasST
var (
st, t int64
v float64
h *histogram.Histogram
fh *histogram.FloatHistogram
t int64
v float64
h *histogram.Histogram
fh *histogram.FloatHistogram
)
switch typ {
case chunkenc.ValFloat:
t, v = seriesIter.At()
st = seriesIter.AtST()
app.Append(st, t, v)
case chunkenc.ValHistogram:
t, h = seriesIter.AtHistogram(nil)
st = seriesIter.AtST()
newChk, recoded, app, err = app.AppendHistogram(nil, st, t, h, false)
if err != nil {
return errChunksIterator{err: err}
@ -388,7 +390,6 @@ func (s *seriesToChunkEncoder) Iterator(it chunks.Iterator) chunks.Iterator {
}
case chunkenc.ValFloatHistogram:
t, fh = seriesIter.AtFloatHistogram(nil)
st = seriesIter.AtST()
newChk, recoded, app, err = app.AppendFloatHistogram(nil, st, t, fh, false)
if err != nil {
return errChunksIterator{err: err}

View File

@ -95,7 +95,9 @@ type Options struct {
// EnableSTStorage determines whether agent DB should write a Start Timestamp (ST)
// per sample to WAL.
// TODO(bwplotka): Implement this option as per PROM-60, currently it's noop.
// Controlled by the `--enable-feature=st-storage` CLI flag; when enabled, ST is
// persisted to the WAL for samples that include a non-zero start timestamp in
// supported record types.
EnableSTStorage bool
}
@ -490,7 +492,7 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri
return
}
decoded <- series
case record.Samples:
case record.Samples, record.SamplesV2:
samples := db.walReplaySamplesPool.Get()[:0]
samples, err = dec.Samples(rec, samples)
if err != nil {
@ -750,7 +752,7 @@ func (db *DB) truncate(mint int64) error {
db.metrics.checkpointCreationTotal.Inc()
if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, db.keepSeriesInWALCheckpointFn(last), mint); err != nil {
if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, db.keepSeriesInWALCheckpointFn(last), mint, db.opts.EnableSTStorage); err != nil {
db.metrics.checkpointCreationFail.Inc()
var cerr *wlog.CorruptionErr
if errors.As(err, &cerr) {
@ -1196,7 +1198,7 @@ func (a *appenderBase) log() error {
a.mtx.RLock()
defer a.mtx.RUnlock()
var encoder record.Encoder
encoder := record.Encoder{EnableSTStorage: a.opts.EnableSTStorage}
buf := a.bufPool.Get().([]byte)
defer func() {
a.bufPool.Put(buf) //nolint:staticcheck
@ -1320,7 +1322,7 @@ func (a *appenderBase) logSeries() error {
a.bufPool.Put(buf) //nolint:staticcheck
}()
var encoder record.Encoder
encoder := record.Encoder{EnableSTStorage: a.opts.EnableSTStorage}
buf = encoder.Series(a.pendingSeries, buf)
if err := a.wal.Log(buf); err != nil {
return err

View File

@ -72,7 +72,6 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
lastTS := s.lastTs
s.Unlock()
// TODO(bwplotka): Handle ST natively (as per PROM-60).
if a.opts.EnableSTAsZeroSample && st != 0 {
a.bestEffortAppendSTZeroSample(s, ls, lastTS, st, t, h, fh)
}
@ -86,6 +85,7 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
case fh != nil:
isStale = value.IsStaleNaN(fh.Sum)
// NOTE: always modify pendingFloatHistograms and floatHistogramSeries together
// TODO(krajorama,ywwg,bwplotka): Pass ST when available in WAL.
a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{
Ref: s.ref,
T: t,
@ -95,6 +95,7 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
case h != nil:
isStale = value.IsStaleNaN(h.Sum)
// NOTE: always modify pendingHistograms and histogramSeries together
// TODO(krajorama,ywwg,bwplotka): Pass ST when available in WAL.
a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{
Ref: s.ref,
T: t,
@ -107,6 +108,7 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
// NOTE: always modify pendingSamples and sampleSeries together.
a.pendingSamples = append(a.pendingSamples, record.RefSample{
Ref: s.ref,
ST: st,
T: t,
V: v,
})

View File

@ -18,6 +18,7 @@ import (
"fmt"
"math"
"path/filepath"
"strconv"
"testing"
"time"
@ -89,278 +90,301 @@ func TestDB_InvalidSeries_AppendV2(t *testing.T) {
})
}
// TestCommit_AppendV2 tests Appender commit.
// TODO(bwplotka): Rewrite this so Refs are generated, then appended, then expected so we test the
// exact data durability.
func TestCommit_AppendV2(t *testing.T) {
const (
numDatapoints = 1000
numHistograms = 100
numSeries = 8
)
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
opts := DefaultOptions()
opts.EnableSTStorage = enableSTStorage
s := createTestAgentDB(t, nil, opts)
s := createTestAgentDB(t, nil, DefaultOptions())
app := s.AppenderV2(context.TODO())
var (
expectedSampleSTs []int64
gotSampleSTs []int64
)
if enableSTStorage {
expectedSampleSTs = make([]int64, 0, numSeries*numDatapoints)
gotSampleSTs = make([]int64, 0, numSeries*numDatapoints)
}
lbls := labelsForTest(t.Name(), numSeries)
for _, l := range lbls {
lset := labels.New(l...)
app := s.AppenderV2(t.Context())
lbls := labelsForTest(t.Name(), numSeries)
for _, l := range lbls {
lset := labels.New(l...)
for i := range numDatapoints {
sample := chunks.GenerateSamples(0, 1)
_, err := app.Append(0, lset, 0, sample[0].T(), sample[0].F(), nil, nil, storage.AOptions{
Exemplars: []exemplar.Exemplar{{
Labels: lset,
Ts: sample[0].T() + int64(i),
Value: sample[0].F(),
HasTs: true,
}},
})
for i := range numDatapoints {
sample := chunks.GenerateSamples(0, 1)
st := int64(i + 1234)
_, err := app.Append(0, lset, st, sample[0].T()+2000, sample[0].F(), nil, nil, storage.AOptions{
Exemplars: []exemplar.Exemplar{{
Labels: lset,
Ts: sample[0].T() + int64(i) + 2000,
Value: sample[0].F(),
HasTs: true,
}},
})
require.NoError(t, err)
if enableSTStorage {
expectedSampleSTs = append(expectedSampleSTs, st)
}
}
}
lbls = labelsForTest(t.Name()+"_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
histograms := tsdbutil.GenerateTestHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i+2234), int64(i+2000), 0, histograms[i], nil, storage.AOptions{})
require.NoError(t, err)
}
}
lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
customBucketHistograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i+3234), int64(i+2000), 0, customBucketHistograms[i], nil, storage.AOptions{})
require.NoError(t, err)
}
}
lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i+4234), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{})
require.NoError(t, err)
}
}
lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
customBucketFloatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i+5234), int64(i+2000), 0, nil, customBucketFloatHistograms[i], storage.AOptions{})
require.NoError(t, err)
}
}
require.NoError(t, app.Commit())
require.NoError(t, s.Close())
sr, err := wlog.NewSegmentsReader(s.wal.Dir())
require.NoError(t, err)
}
defer func() {
require.NoError(t, sr.Close())
}()
// Read records from WAL and check for expected count of series, samples, and exemplars.
var (
r = wlog.NewReader(sr)
dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
walSeriesCount, walSamplesCount, walExemplarsCount, walHistogramCount, walFloatHistogramCount int
)
for r.Next() {
rec := r.Record()
switch dec.Type(rec) {
case record.Series:
var series []record.RefSeries
series, err = dec.Series(rec, series)
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
if enableSTStorage {
t.Errorf("Got V1 Samples when ST enabled")
}
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
walSamplesCount += len(samples)
case record.SamplesV2:
if !enableSTStorage {
t.Errorf("Got V2 Samples when ST disabled")
}
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
for _, s := range samples {
gotSampleSTs = append(gotSampleSTs, s.ST)
}
walSamplesCount += len(samples)
case record.HistogramSamples, record.CustomBucketsHistogramSamples:
var histograms []record.RefHistogramSample
histograms, err = dec.HistogramSamples(rec, histograms)
require.NoError(t, err)
walHistogramCount += len(histograms)
case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
var floatHistograms []record.RefFloatHistogramSample
floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
require.NoError(t, err)
walFloatHistogramCount += len(floatHistograms)
case record.Exemplars:
var exemplars []record.RefExemplar
exemplars, err = dec.Exemplars(rec, exemplars)
require.NoError(t, err)
walExemplarsCount += len(exemplars)
default:
}
}
// Check that the WAL contained the same number of committed series/samples/exemplars.
require.Equal(t, numSeries*5, walSeriesCount, "unexpected number of series")
require.Equal(t, numSeries*numDatapoints, walSamplesCount, "unexpected number of samples")
require.Equal(t, expectedSampleSTs, gotSampleSTs, "unexpected STs received")
require.Equal(t, numSeries*numDatapoints, walExemplarsCount, "unexpected number of exemplars")
require.Equal(t, numSeries*numHistograms*2, walHistogramCount, "unexpected number of histograms")
require.Equal(t, numSeries*numHistograms*2, walFloatHistogramCount, "unexpected number of float histograms")
// Check that we can still create both kinds of Appender.
// Regression test against https://github.com/prometheus/prometheus/issues/17800.
_ = s.Appender(t.Context())
_ = s.AppenderV2(t.Context())
})
}
lbls = labelsForTest(t.Name()+"_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
histograms := tsdbutil.GenerateTestHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{})
require.NoError(t, err)
}
}
lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
customBucketHistograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, customBucketHistograms[i], nil, storage.AOptions{})
require.NoError(t, err)
}
}
lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{})
require.NoError(t, err)
}
}
lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
customBucketFloatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, nil, customBucketFloatHistograms[i], storage.AOptions{})
require.NoError(t, err)
}
}
require.NoError(t, app.Commit())
require.NoError(t, s.Close())
sr, err := wlog.NewSegmentsReader(s.wal.Dir())
require.NoError(t, err)
defer func() {
require.NoError(t, sr.Close())
}()
// Read records from WAL and check for expected count of series, samples, and exemplars.
var (
r = wlog.NewReader(sr)
dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
walSeriesCount, walSamplesCount, walExemplarsCount, walHistogramCount, walFloatHistogramCount int
)
for r.Next() {
rec := r.Record()
switch dec.Type(rec) {
case record.Series:
var series []record.RefSeries
series, err = dec.Series(rec, series)
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
walSamplesCount += len(samples)
case record.HistogramSamples, record.CustomBucketsHistogramSamples:
var histograms []record.RefHistogramSample
histograms, err = dec.HistogramSamples(rec, histograms)
require.NoError(t, err)
walHistogramCount += len(histograms)
case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
var floatHistograms []record.RefFloatHistogramSample
floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
require.NoError(t, err)
walFloatHistogramCount += len(floatHistograms)
case record.Exemplars:
var exemplars []record.RefExemplar
exemplars, err = dec.Exemplars(rec, exemplars)
require.NoError(t, err)
walExemplarsCount += len(exemplars)
default:
}
}
// Check that the WAL contained the same number of committed series/samples/exemplars.
require.Equal(t, numSeries*5, walSeriesCount, "unexpected number of series")
require.Equal(t, numSeries*numDatapoints, walSamplesCount, "unexpected number of samples")
require.Equal(t, numSeries*numDatapoints, walExemplarsCount, "unexpected number of exemplars")
require.Equal(t, numSeries*numHistograms*2, walHistogramCount, "unexpected number of histograms")
require.Equal(t, numSeries*numHistograms*2, walFloatHistogramCount, "unexpected number of float histograms")
// Check that we can still create both kinds of Appender - see https://github.com/prometheus/prometheus/issues/17800.
_ = s.Appender(context.TODO())
_ = s.AppenderV2(context.TODO())
}
func TestRollback_AppendV2(t *testing.T) {
func TestRollbackAppendV2(t *testing.T) {
const (
numDatapoints = 1000
numHistograms = 100
numSeries = 8
)
s := createTestAgentDB(t, nil, DefaultOptions())
app := s.AppenderV2(context.TODO())
for _, enableSTStorage := range []bool{false, true} {
opts := DefaultOptions()
opts.EnableSTStorage = enableSTStorage
s := createTestAgentDB(t, nil, opts)
app := s.AppenderV2(context.TODO())
lbls := labelsForTest(t.Name(), numSeries)
for _, l := range lbls {
lset := labels.New(l...)
lbls := labelsForTest(t.Name(), numSeries)
for _, l := range lbls {
lset := labels.New(l...)
for range numDatapoints {
sample := chunks.GenerateSamples(0, 1)
_, err := app.Append(0, lset, 0, sample[0].T(), sample[0].F(), nil, nil, storage.AOptions{})
require.NoError(t, err)
for i := range numDatapoints {
sample := chunks.GenerateSamples(0, 1)
_, err := app.Append(0, lset, int64(i), sample[0].T()+2000, sample[0].F(), nil, nil, storage.AOptions{})
require.NoError(t, err)
}
}
}
lbls = labelsForTest(t.Name()+"_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
lbls = labelsForTest(t.Name()+"_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
histograms := tsdbutil.GenerateTestHistograms(numHistograms)
histograms := tsdbutil.GenerateTestHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{})
require.NoError(t, err)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{})
require.NoError(t, err)
}
}
}
lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
histograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
histograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{})
require.NoError(t, err)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{})
require.NoError(t, err)
}
}
}
lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{})
require.NoError(t, err)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{})
require.NoError(t, err)
}
}
}
lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
for _, l := range lbls {
lset := labels.New(l...)
floatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
floatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
for i := range numHistograms {
_, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{})
require.NoError(t, err)
for i := range numHistograms {
_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{})
require.NoError(t, err)
}
}
}
// Do a rollback, which should clear uncommitted data. A followup call to
// commit should persist nothing to the WAL.
require.NoError(t, app.Rollback())
require.NoError(t, app.Commit())
require.NoError(t, s.Close())
// Do a rollback, which should clear uncommitted data. A followup call to
// commit should persist nothing to the WAL.
require.NoError(t, app.Rollback())
require.NoError(t, app.Commit())
require.NoError(t, s.Close())
sr, err := wlog.NewSegmentsReader(s.wal.Dir())
require.NoError(t, err)
defer func() {
require.NoError(t, sr.Close())
}()
sr, err := wlog.NewSegmentsReader(s.wal.Dir())
require.NoError(t, err)
defer func() {
require.NoError(t, sr.Close())
}()
// Read records from WAL and check for expected count of series and samples.
var (
r = wlog.NewReader(sr)
dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
// Read records from WAL and check for expected count of series and samples.
var (
r = wlog.NewReader(sr)
dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
walSeriesCount, walSamplesCount, walHistogramCount, walFloatHistogramCount, walExemplarsCount int
)
for r.Next() {
rec := r.Record()
switch dec.Type(rec) {
case record.Series:
var series []record.RefSeries
series, err = dec.Series(rec, series)
require.NoError(t, err)
walSeriesCount += len(series)
walSeriesCount int
)
for r.Next() {
rec := r.Record()
switch dec.Type(rec) {
case record.Series:
var series []record.RefSeries
series, err = dec.Series(rec, series)
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
walSamplesCount += len(samples)
case record.Samples, record.SamplesV2:
t.Errorf("should not have found samples")
case record.Exemplars:
var exemplars []record.RefExemplar
exemplars, err = dec.Exemplars(rec, exemplars)
require.NoError(t, err)
walExemplarsCount += len(exemplars)
case record.Exemplars:
t.Errorf("should not have found exemplars")
case record.HistogramSamples, record.CustomBucketsHistogramSamples:
var histograms []record.RefHistogramSample
histograms, err = dec.HistogramSamples(rec, histograms)
require.NoError(t, err)
walHistogramCount += len(histograms)
case record.HistogramSamples, record.CustomBucketsHistogramSamples, record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
t.Errorf("should not have found histograms")
case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
var floatHistograms []record.RefFloatHistogramSample
floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
require.NoError(t, err)
walFloatHistogramCount += len(floatHistograms)
default:
default:
}
}
}
// Check that only series get stored after calling Rollback.
require.Equal(t, numSeries*5, walSeriesCount, "series should have been written to WAL")
require.Equal(t, 0, walSamplesCount, "samples should not have been written to WAL")
require.Equal(t, 0, walExemplarsCount, "exemplars should not have been written to WAL")
require.Equal(t, 0, walHistogramCount, "histograms should not have been written to WAL")
require.Equal(t, 0, walFloatHistogramCount, "float histograms should not have been written to WAL")
// Check that only series get stored after calling Rollback.
require.Equal(t, numSeries*5, walSeriesCount, "series should have been written to WAL")
}
}
func TestFullTruncateWAL_AppendV2(t *testing.T) {

View File

@ -226,7 +226,7 @@ func TestCommit(t *testing.T) {
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
case record.Samples, record.SamplesV2:
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
@ -362,7 +362,7 @@ func TestRollback(t *testing.T) {
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
case record.Samples, record.SamplesV2:
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
@ -1425,7 +1425,7 @@ func readWALSamples(t *testing.T, walDir string) []walSample {
series, err := dec.Series(rec, nil)
require.NoError(t, err)
lastSeries = series[0]
case record.Samples:
case record.Samples, record.SamplesV2:
samples, err = dec.Samples(rec, samples[:0])
require.NoError(t, err)
for _, s := range samples {

View File

@ -0,0 +1,343 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package chunkenc
import (
"errors"
"fmt"
"io"
"math"
"math/rand"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/timestamp"
)
type sampleCase struct {
name string
samples []triple
}
type fmtCase struct {
name string
newChunkFn func() Chunk
stUnsupported bool
}
func foreachFmtSampleCase(b *testing.B, fn func(b *testing.B, f fmtCase, s sampleCase)) {
const nSamples = 120 // Same as tsdb.DefaultSamplesPerChunk.
d, err := time.Parse(time.DateTime, "2025-11-04 10:01:05")
require.NoError(b, err)
var (
r = rand.New(rand.NewSource(1)) // Fixed seed for reproducibility.
initST = timestamp.FromTime(d) // Use realistic timestamp.
initT = initST + 15000 // 15s after initST.
initV = 1243535.123
rInts = make([]int64, 2*nSamples) // Random ints for timestamps and STs.
rFloats = make([]float64, nSamples)
)
// Pre-generate random numbers so that adding/removing cases does not change
// the generated samples.
for i := range nSamples {
rInts[i] = int64(r.Intn(100))
rInts[nSamples+i] = int64(r.Intn(100))
rFloats[i] = float64(r.Intn(100))
}
// tPatterns control how the regular timestamp advances.
type tPattern struct {
name string
next func(t int64, i int) int64
}
// vPatterns control how the value advances.
type vPattern struct {
name string
next func(v float64, i int) float64
}
// stPatterns compute the start timestamp from the previous t (before the
// step), the new t (after the step), and the sample index.
type stPattern struct {
name string
compute func(prevT, newT int64, i int) int64
}
tPatterns := []tPattern{
{
name: "t=constant",
next: func(t int64, _ int) int64 { return t + 15000 },
},
{
// 15 seconds ± up to 100ms of jitter.
name: "t=jitter",
next: func(t int64, i int) int64 { return t + rInts[i] - 50 + 15000 },
},
{
// First 10 samples at constant 60s, then one 10-interval gap (600s),
// then 60s ± 30ms jitter. The gap triggers XOR18111 full mode via
// multiplier encoding (dod=540000 = 9×60000). Subsequent small-jitter
// delta-of-deltas (≤30ms) use XOR18111's 7-bit full-mode code (9 bits
// total) vs XOR compact's minimum 14-bit code (16 bits total).
name: "t=gap-jitter",
next: func(t int64, i int) int64 {
if i < 10 {
return t + 60000
}
if i == 10 {
return t + 10*60000 // 10-interval gap; triggers XOR18111 full mode.
}
return t + 60000 + rInts[i]%61 - 30 // 60s ± 30ms jitter.
},
},
}
vPatterns := []vPattern{
{
name: "v=constant",
next: func(v float64, _ int) float64 { return v },
},
// We are not interested in float compression we're not changing it.
// {
// // Varying from -50 to +50 in 100 discrete steps.
// name: "v=rand-steps",
// next: func(v float64, i int) float64 { return v + rFloats[i] - 50 },
// },
// {
// // Random increment between 0 and 1.0.
// name: "v=rand0-1",
// next: func(v float64, i int) float64 { return v + rFloats[i]/100.0 },
// },
// {
// // Random decrement between 0 and -1.0. Tests negative varint encoding;
// // see https://victoriametrics.com/blog/go-protobuf/.
// name: "v=nrand0-1",
// next: func(v float64, i int) float64 { return v - rFloats[i]/100.0 },
// },
}
stPatterns := []stPattern{
{
name: "st=0",
compute: func(_, _ int64, _ int) int64 { return 0 },
},
{
// Constant ST throughout the chunk, typical for long-running counters.
name: "st=cumulative",
compute: func(_, _ int64, _ int) int64 { return initST },
},
{
// ST is just after the previous sample's t: tight delta interval.
name: "st=delta-excl",
compute: func(prevT, _ int64, _ int) int64 { return prevT + 1 },
},
{
// ST equals the previous sample's t: inclusive delta interval.
name: "st=delta-incl",
compute: func(prevT, _ int64, _ int) int64 { return prevT },
},
{
// ST equals the current sample's t.
name: "st=t",
compute: func(_, newT int64, _ int) int64 { return newT },
},
{
// ST is equal to the previous t plus up to 100ms of jitter.
name: "st=delta-jitter",
compute: func(prevT, _ int64, i int) int64 { return prevT + rInts[nSamples+i] },
},
{
// Cumulative ST with periodic resets 10s before the current t.
name: "st=cum-resets",
compute: func(_, newT int64, i int) int64 {
if i%6 == 5 {
return newT - 10000
}
return initST
},
},
{
// Cumulative ST with periodic zero resets.
name: "st=cum-zeros",
compute: func(_, _ int64, i int) int64 {
if i%6 == 5 {
return 0
}
return initST
},
},
}
var sampleCases []sampleCase
for _, tp := range tPatterns {
for _, vp := range vPatterns {
for _, sp := range stPatterns {
samples := make([]triple, 0, nSamples)
t, v := initT, initV
for i := range nSamples {
prevT := t
t = tp.next(t, i)
v = vp.next(v, i)
st := sp.compute(prevT, t, i)
samples = append(samples, triple{st: st, t: t, v: v})
}
sampleCases = append(sampleCases, sampleCase{
name: tp.name + "/" + vp.name + "/" + sp.name,
samples: samples,
})
}
}
}
for _, f := range []fmtCase{
{name: "XOR", newChunkFn: func() Chunk { return NewXORChunk() }, stUnsupported: true},
{name: "XOR2", newChunkFn: func() Chunk { return NewXOR2Chunk() }},
} {
for _, s := range sampleCases {
b.Run(fmt.Sprintf("fmt=%s/%s", f.name, s.name), func(b *testing.B) {
fn(b, f, s)
})
}
}
}
/*
export bench=bw.bench/append.v2 && go test \
-run '^$' -bench '^BenchmarkAppender' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m \
| tee ${bench}.txt
For profiles:
export bench=bw.bench/appendprof && go test \
-run '^$' -bench '^BenchmarkAppender' \
-benchtime 1s -count 1 -cpu 2 -timeout 999m \
-cpuprofile=${bench}.cpu.pprof \
| tee ${bench}.txt
*/
func BenchmarkAppender(b *testing.B) {
foreachFmtSampleCase(b, func(b *testing.B, f fmtCase, s sampleCase) {
b.ReportAllocs()
for b.Loop() {
c := f.newChunkFn()
a, err := c.Appender()
if err != nil {
b.Fatalf("get appender: %s", err)
}
for _, p := range s.samples {
a.Append(p.st, p.t, p.v)
}
// NOTE: Some buffered implementations only encode on Bytes().
b.ReportMetric(float64(len(c.Bytes())), "B/chunk")
require.Equal(b, len(s.samples), c.NumSamples())
}
})
}
/*
export bench=bw.bench/iter && go test \
-run '^$' -bench '^BenchmarkIterator' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m \
| tee ${bench}.txt
For profiles:
export bench=bw.bench/iterprof && go test \
-run '^$' -bench '^BenchmarkIterator' \
-benchtime 1000000x -count 1 -cpu 2 -timeout 999m \
-cpuprofile=${bench}.cpu.pprof \
| tee ${bench}.txt
export bench=bw.bench/iterprof && go test \
-run '^$' -bench '^BenchmarkIterator' \
-benchtime 1000000x -count 1 -cpu 2 -timeout 999m \
-memprofile=${bench}.mem.pprof \
| tee ${bench}.txt
*/
func BenchmarkIterator(b *testing.B) {
foreachFmtSampleCase(b, func(b *testing.B, f fmtCase, s sampleCase) {
floatEquals := func(a, b float64) bool {
return a == b
}
if f.name == "ALPBuffered" {
// Hack as ALP loses precision.
floatEquals = func(a, b float64) bool {
return math.Abs(a-b) < 1e-6
}
}
b.ReportAllocs()
c := f.newChunkFn()
a, err := c.Appender()
if err != nil {
b.Fatalf("get appender: %s", err)
}
for _, p := range s.samples {
a.Append(p.st, p.t, p.v)
}
// Some chunk implementations might be buffered. Reset to ensure we don't reuse
// appending buffers.
c.Reset(c.Bytes())
// While we are at it, test if encoding/decoding works.
it := c.Iterator(nil)
require.Equal(b, len(s.samples), c.NumSamples())
var got []triple
for i := 0; it.Next() == ValFloat; i++ {
t, v := it.At()
got = append(got, triple{st: it.AtST(), t: t, v: v})
}
if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
require.NoError(b, err)
}
expectedSamples := s.samples
if f.stUnsupported {
// If the format does not support ST, zero them out for comparison.
expectedSamples = make([]triple, len(s.samples))
copy(expectedSamples, s.samples)
for i := range s.samples {
expectedSamples[i].st = 0
}
}
if diff := cmp.Diff(expectedSamples, got, cmp.AllowUnexported(triple{}), cmp.Comparer(floatEquals)); diff != "" {
b.Fatalf("mismatch (-want +got):\n%s", diff)
}
var sink float64
// Measure decoding efficiency.
for i := 0; b.Loop(); {
// Some chunk implementations might be buffered. Reset to ensure we don't reuse
// previous decoded data.
c.Reset(c.Bytes())
b.ReportMetric(float64(len(c.Bytes())), "B/chunk")
it := c.Iterator(it)
for it.Next() == ValFloat {
_, v := it.At()
sink = v
i++
}
if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
require.NoError(b, err)
}
_ = sink
}
})
}

View File

@ -101,6 +101,7 @@ func (b *bstream) writeByte(byt byte) {
// writeBits writes the nbits right-most bits of u to the stream
// in left-to-right order.
// TODO: Once XOR2 stabilizes, replace writeBits with the writeBitsFast implementation and remove writeBitsFast.
func (b *bstream) writeBits(u uint64, nbits int) {
u <<= 64 - uint(nbits)
for nbits >= 8 {
@ -117,6 +118,40 @@ func (b *bstream) writeBits(u uint64, nbits int) {
}
}
// writeBitsFast is like writeBits but handles the partial last byte inline to
// avoid per-byte writeByte calls, and writes complete bytes directly to the
// stream slice.
func (b *bstream) writeBitsFast(u uint64, nbits int) {
u <<= 64 - uint(nbits)
// If the last byte is partial, fill its remaining bits first.
if b.count > 0 {
free := int(b.count)
last := len(b.stream) - 1
b.stream[last] |= byte(u >> uint(64-free))
if nbits < free {
b.count = uint8(free - nbits)
return
}
u <<= uint(free)
nbits -= free
b.count = 0
}
// Write complete bytes directly, avoiding per-byte function call overhead.
for nbits >= 8 {
b.stream = append(b.stream, byte(u>>56))
u <<= 8
nbits -= 8
}
// Write any remaining bits as a partial final byte.
if nbits > 0 {
b.stream = append(b.stream, byte(u>>56))
b.count = uint8(8 - nbits)
}
}
type bstreamReader struct {
stream []byte
streamOffset int // The offset from which read the next byte from the stream.
@ -215,6 +250,156 @@ func (b *bstreamReader) ReadByte() (byte, error) {
return byte(v), nil
}
// readXOR2ControlFast is like readXOR2Control but returns false when the
// internal buffer has fewer than 4 valid bits, or when the control prefix
// indicates cases 4 or 5 (top4 == 0xf). The caller should retry with
// readXOR2Control. This function must be kept small and a leaf in order to
// help the compiler inlining it and further improve performance.
func (b *bstreamReader) readXOR2ControlFast() (uint8, bool) {
if b.valid < 4 {
return 0, false
}
top4 := uint8((b.buffer >> (b.valid - 4)) & 0xf)
if top4 < 8 { // '0xxx': dod=0, val=0 (case 0).
b.valid--
return 0, true
}
if top4 < 12 { // '10xx': dod=0, val changed (case 1).
b.valid -= 2
return 1, true
}
if top4 < 14 { // '110x': small dod (case 2).
b.valid -= 3
return 2, true
}
if top4 == 14 { // '1110': medium dod (case 3).
b.valid -= 4
return 3, true
}
return 0, false
}
// readXOR2Control reads the XOR2 variable-length joint control prefix
// and returns 0-5 mapping to the six encoding cases:
//
// 0 → '0' dod=0, val=0 (1 bit consumed)
// 1 → '10' dod=0, val≠0 (2 bits consumed)
// 2 → '110' dod≠0, 13-bit signed dod (3 bits consumed)
// 3 → '1110' dod≠0, 20-bit signed dod (4 bits consumed)
// 4 → '11110' dod≠0, 64-bit escape (5 bits consumed)
// 5 → '11111' dod=0, stale NaN (5 bits consumed)
//
// The fast path peeks at 4 bits from the internal buffer; for the '1111'
// prefix a fifth bit is read to distinguish cases 4 and 5.
func (b *bstreamReader) readXOR2Control() (uint8, error) {
if b.valid >= 4 {
top4 := uint8((b.buffer >> (b.valid - 4)) & 0xf)
if top4 < 8 { // '0xxx' → case 0.
b.valid--
return 0, nil
}
if top4 < 12 { // '10xx' → case 1.
b.valid -= 2
return 1, nil
}
if top4 < 14 { // '110x' → case 2.
b.valid -= 3
return 2, nil
}
if top4 == 14 { // '1110' → case 3.
b.valid -= 4
return 3, nil
}
// '1111': need fifth bit to distinguish cases 4 and 5.
if b.valid >= 5 {
bit4 := uint8((b.buffer >> (b.valid - 5)) & 1)
b.valid -= 5
return 4 + bit4, nil
}
// Fifth bit spans a buffer boundary; consume the four known bits
// and read the fifth from the stream.
b.valid -= 4
bit4, err := b.readBit()
if err != nil {
return 0, err
}
if bit4 == zero {
return 4, nil
}
return 5, nil
}
// Slow path: bits may span buffer boundaries, read one at a time.
bit0, err := b.readBit()
if err != nil {
return 0, err
}
if bit0 == zero {
return 0, nil
}
bit1, err := b.readBit()
if err != nil {
return 0, err
}
if bit1 == zero {
return 1, nil
}
bit2, err := b.readBit()
if err != nil {
return 0, err
}
if bit2 == zero {
return 2, nil
}
bit3, err := b.readBit()
if err != nil {
return 0, err
}
if bit3 == zero {
return 3, nil
}
bit4, err := b.readBit()
if err != nil {
return 0, err
}
if bit4 == zero {
return 4, nil
}
return 5, nil
}
// readUvarint decodes a varint-encoded uint64 using direct method calls,
// avoiding the io.ByteReader interface dispatch used by binary.ReadUvarint,
// which causes the receiver to escape to the heap.
func (b *bstreamReader) readUvarint() (uint64, error) {
var x uint64
var s uint
for range binary.MaxVarintLen64 {
byt, err := b.ReadByte()
if err != nil {
return x, err
}
if byt < 0x80 {
return x | uint64(byt)<<s, nil
}
x |= uint64(byt&0x7f) << s
s += 7
}
return x, io.ErrUnexpectedEOF
}
// readVarint decodes a varint-encoded int64 using direct method calls,
// avoiding the io.ByteReader interface dispatch used by binary.ReadVarint,
// which causes the receiver to escape to the heap.
func (b *bstreamReader) readVarint() (int64, error) {
ux, err := b.readUvarint()
x := int64(ux >> 1)
if ux&1 != 0 {
x = ^x
}
return x, err
}
// loadNextBuffer loads the next bytes from the stream into the internal buffer.
// The input nbits is the minimum number of bits that must be read, but the implementation
// can read more (if possible) to improve performances.

View File

@ -14,6 +14,7 @@
package chunkenc
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
@ -32,6 +33,44 @@ func TestBstream_Reset(t *testing.T) {
}, bs)
}
// BenchmarkWriteBits benchmarks writeBits for various bit widths.
func BenchmarkWriteBits(b *testing.B) {
sizes := []int{1, 8, 17, 32, 52, 64}
for _, nbits := range sizes {
b.Run(fmt.Sprintf("nbits=%d", nbits), func(b *testing.B) {
b.ReportAllocs()
var bs bstream
bs.stream = make([]byte, 0, 1024)
for range b.N {
bs.stream = bs.stream[:0]
bs.count = 0
for j := range 100 {
bs.writeBits(uint64(j), nbits)
}
}
})
}
}
// BenchmarkWriteBitsFast benchmarks writeBitsFast for various bit widths.
func BenchmarkWriteBitsFast(b *testing.B) {
sizes := []int{1, 8, 17, 32, 52, 64}
for _, nbits := range sizes {
b.Run(fmt.Sprintf("nbits=%d", nbits), func(b *testing.B) {
b.ReportAllocs()
var bs bstream
bs.stream = make([]byte, 0, 1024)
for range b.N {
bs.stream = bs.stream[:0]
bs.count = 0
for j := range 100 {
bs.writeBitsFast(uint64(j), nbits)
}
}
})
}
}
func TestBstreamReader(t *testing.T) {
// Write to the bit stream.
w := bstream{}

View File

@ -30,6 +30,7 @@ const (
EncXOR
EncHistogram
EncFloatHistogram
EncXOR2
)
func (e Encoding) String() string {
@ -42,13 +43,15 @@ func (e Encoding) String() string {
return "histogram"
case EncFloatHistogram:
return "floathistogram"
case EncXOR2:
return "XOR2"
}
return "<unknown>"
}
// IsValidEncoding returns true for supported encodings.
func IsValidEncoding(e Encoding) bool {
return e == EncXOR || e == EncHistogram || e == EncFloatHistogram
return e == EncXOR || e == EncHistogram || e == EncFloatHistogram || e == EncXOR2
}
const (
@ -73,6 +76,8 @@ type Chunk interface {
Bytes() []byte
// Encoding returns the encoding type of the chunk.
// If the chunk is capable of storing ST (start timestamps), it should
// return the appropriate encoding type (e.g., EncXOR2).
Encoding() Encoding
// Appender returns an appender to append samples to the chunk.
@ -186,9 +191,12 @@ func (v ValueType) String() string {
}
}
func (v ValueType) ChunkEncoding() Encoding {
func (v ValueType) ChunkEncoding(useXOR2 bool) Encoding {
switch v {
case ValFloat:
if useXOR2 {
return EncXOR2
}
return EncXOR
case ValHistogram:
return EncHistogram
@ -199,17 +207,9 @@ func (v ValueType) ChunkEncoding() Encoding {
}
}
func (v ValueType) NewChunk() (Chunk, error) {
switch v {
case ValFloat:
return NewXORChunk(), nil
case ValHistogram:
return NewHistogramChunk(), nil
case ValFloatHistogram:
return NewFloatHistogramChunk(), nil
default:
return nil, fmt.Errorf("value type %v unsupported", v)
}
// NewChunk returns a new empty chunk for the given value type.
func (v ValueType) NewChunk(useXOR2 bool) (Chunk, error) {
return NewEmptyChunk(v.ChunkEncoding(useXOR2))
}
// MockSeriesIterator returns an iterator for a mock series with custom
@ -299,6 +299,7 @@ type pool struct {
xor sync.Pool
histogram sync.Pool
floatHistogram sync.Pool
xo2 sync.Pool
}
// NewPool returns a new pool.
@ -319,6 +320,11 @@ func NewPool() Pool {
return &FloatHistogramChunk{b: bstream{}}
},
},
xo2: sync.Pool{
New: func() any {
return &XOR2Chunk{b: bstream{}}
},
},
}
}
@ -331,6 +337,8 @@ func (p *pool) Get(e Encoding, b []byte) (Chunk, error) {
c = p.histogram.Get().(*HistogramChunk)
case EncFloatHistogram:
c = p.floatHistogram.Get().(*FloatHistogramChunk)
case EncXOR2:
c = p.xo2.Get().(*XOR2Chunk)
default:
return nil, fmt.Errorf("invalid chunk encoding %q", e)
}
@ -352,6 +360,9 @@ func (p *pool) Put(c Chunk) error {
case EncFloatHistogram:
_, ok = c.(*FloatHistogramChunk)
sp = &p.floatHistogram
case EncXOR2:
_, ok = c.(*XOR2Chunk)
sp = &p.xo2
default:
return fmt.Errorf("invalid chunk encoding %q", c.Encoding())
}
@ -378,6 +389,8 @@ func FromData(e Encoding, d []byte) (Chunk, error) {
return &HistogramChunk{b: bstream{count: 0, stream: d}}, nil
case EncFloatHistogram:
return &FloatHistogramChunk{b: bstream{count: 0, stream: d}}, nil
case EncXOR2:
return &XOR2Chunk{b: bstream{count: 0, stream: d}}, nil
}
return nil, fmt.Errorf("invalid chunk encoding %q", e)
}
@ -391,6 +404,8 @@ func NewEmptyChunk(e Encoding) (Chunk, error) {
return NewHistogramChunk(), nil
case EncFloatHistogram:
return NewFloatHistogramChunk(), nil
case EncXOR2:
return NewXOR2Chunk(), nil
}
return nil, fmt.Errorf("invalid chunk encoding %q", e)
}

View File

@ -16,36 +16,41 @@ package chunkenc
import (
"errors"
"fmt"
"io"
"math/rand"
"testing"
"github.com/stretchr/testify/require"
)
type pair struct {
t int64
v float64
type triple struct {
st, t int64
v float64
}
func TestChunk(t *testing.T) {
for enc, nc := range map[Encoding]func() Chunk{
EncXOR: func() Chunk { return NewXORChunk() },
} {
t.Run(fmt.Sprintf("%v", enc), func(t *testing.T) {
testcases := []struct {
encoding Encoding
supportsST bool
factory func() Chunk
}{
{encoding: EncXOR, supportsST: false, factory: func() Chunk { return NewXORChunk() }},
{encoding: EncXOR2, supportsST: true, factory: func() Chunk { return NewXOR2Chunk() }},
}
for _, tc := range testcases {
t.Run(fmt.Sprintf("%v", tc.encoding), func(t *testing.T) {
for range make([]struct{}, 1) {
c := nc()
testChunk(t, c)
c := tc.factory()
testChunk(t, c, tc.supportsST)
}
})
}
}
func testChunk(t *testing.T, c Chunk) {
func testChunk(t *testing.T, c Chunk, supportsST bool) {
app, err := c.Appender()
require.NoError(t, err)
var exp []pair
var exp []triple
var (
ts = int64(1234123324)
v = 1243535.123
@ -65,26 +70,30 @@ func testChunk(t *testing.T, c Chunk) {
require.NoError(t, err)
}
app.Append(0, ts, v)
exp = append(exp, pair{t: ts, v: v})
app.Append(ts-100, ts, v)
expST := int64(0)
if supportsST {
expST = ts - 100
}
exp = append(exp, triple{st: expST, t: ts, v: v})
}
// 1. Expand iterator in simple case.
it1 := c.Iterator(nil)
var res1 []pair
var res1 []triple
for it1.Next() == ValFloat {
ts, v := it1.At()
res1 = append(res1, pair{t: ts, v: v})
res1 = append(res1, triple{st: it1.AtST(), t: ts, v: v})
}
require.NoError(t, it1.Err())
require.Equal(t, exp, res1)
// 2. Expand second iterator while reusing first one.
it2 := c.Iterator(it1)
var res2 []pair
var res2 []triple
for it2.Next() == ValFloat {
ts, v := it2.At()
res2 = append(res2, pair{t: ts, v: v})
res2 = append(res2, triple{st: it2.AtST(), t: ts, v: v})
}
require.NoError(t, it2.Err())
require.Equal(t, exp, res2)
@ -93,18 +102,22 @@ func testChunk(t *testing.T, c Chunk) {
mid := len(exp) / 2
it3 := c.Iterator(nil)
var res3 []pair
var res3 []triple
require.Equal(t, ValFloat, it3.Seek(exp[mid].t))
// Below ones should not matter.
require.Equal(t, ValFloat, it3.Seek(exp[mid].t))
require.Equal(t, ValFloat, it3.Seek(exp[mid].t))
ts, v = it3.At()
res3 = append(res3, pair{t: ts, v: v})
res3 = append(res3, triple{st: it3.AtST(), t: ts, v: v})
lastTs := ts
for it3.Next() == ValFloat {
ts, v := it3.At()
res3 = append(res3, pair{t: ts, v: v})
lastTs = ts
res3 = append(res3, triple{st: it3.AtST(), t: ts, v: v})
}
// Seeking to last timestamp should work and it is a no-op.
require.Equal(t, ValFloat, it3.Seek(lastTs))
require.NoError(t, it3.Err())
require.Equal(t, exp[mid:], res3)
require.Equal(t, ValNone, it3.Seek(exp[len(exp)-1].t+1))
@ -129,6 +142,10 @@ func TestPool(t *testing.T) {
name: "float histogram",
encoding: EncFloatHistogram,
},
{
name: "xor opt st",
encoding: EncXOR2,
},
{
name: "invalid encoding",
encoding: EncNone,
@ -150,6 +167,8 @@ func TestPool(t *testing.T) {
b = &c.(*HistogramChunk).b
case EncFloatHistogram:
b = &c.(*FloatHistogramChunk).b
case EncXOR2:
b = &c.(*XOR2Chunk).b
default:
b = &c.(*XORChunk).b
}
@ -199,111 +218,3 @@ func (c fakeChunk) Encoding() Encoding {
func (c fakeChunk) Reset([]byte) {
c.t.Fatal("Reset should not be called")
}
func benchmarkIterator(b *testing.B, newChunk func() Chunk) {
const samplesPerChunk = 250
var (
t = int64(1234123324)
v = 1243535.123
exp []pair
)
for range samplesPerChunk {
// t += int64(rand.Intn(10000) + 1)
t += int64(1000)
// v = rand.Float64()
v += float64(100)
exp = append(exp, pair{t: t, v: v})
}
chunk := newChunk()
{
a, err := chunk.Appender()
if err != nil {
b.Fatalf("get appender: %s", err)
}
j := 0
for _, p := range exp {
if j > 250 {
break
}
a.Append(0, p.t, p.v)
j++
}
}
b.ReportAllocs()
var res float64
var it Iterator
for i := 0; b.Loop(); {
it := chunk.Iterator(it)
for it.Next() == ValFloat {
_, v := it.At()
res = v
i++
}
if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
require.NoError(b, err)
}
_ = res
}
}
func newXORChunk() Chunk {
return NewXORChunk()
}
func BenchmarkXORIterator(b *testing.B) {
benchmarkIterator(b, newXORChunk)
}
func BenchmarkXORAppender(b *testing.B) {
r := rand.New(rand.NewSource(1))
b.Run("constant", func(b *testing.B) {
benchmarkAppender(b, func() (int64, float64) {
return 1000, 0
}, newXORChunk)
})
b.Run("random steps", func(b *testing.B) {
benchmarkAppender(b, func() (int64, float64) {
return int64(r.Intn(100) - 50 + 15000), // 15 seconds +- up to 100ms of jitter.
float64(r.Intn(100) - 50) // Varying from -50 to +50 in 100 discrete steps.
}, newXORChunk)
})
b.Run("random 0-1", func(b *testing.B) {
benchmarkAppender(b, func() (int64, float64) {
return int64(r.Intn(100) - 50 + 15000), // 15 seconds +- up to 100ms of jitter.
r.Float64() // Random between 0 and 1.0.
}, newXORChunk)
})
}
func benchmarkAppender(b *testing.B, deltas func() (int64, float64), newChunk func() Chunk) {
var (
t = int64(1234123324)
v = 1243535.123
)
const nSamples = 120 // Same as tsdb.DefaultSamplesPerChunk.
var exp []pair
for range nSamples {
dt, dv := deltas()
t += dt
v += dv
exp = append(exp, pair{t: t, v: v})
}
b.ReportAllocs()
for b.Loop() {
c := newChunk()
a, err := c.Appender()
if err != nil {
b.Fatalf("get appender: %s", err)
}
for _, p := range exp {
a.Append(0, p.t, p.v)
}
}
}

View File

@ -0,0 +1,156 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package chunkenc
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/histogram"
)
// testChunkSTHandling tests handling of start times in chunks.
// It uses 0-4 samples with timestamp 1000,2000,3000,4000 and monotonically
// increasing start times that are chosen from 0-(ts-500) for each sample.
// All combinations of start times are tested for each number of samples.
func testChunkSTHandling(t *testing.T, vt ValueType, chunkFactory func() Chunk) {
sampleAppend := func(app Appender, vt ValueType, st, ts int64, v float64) {
switch vt {
case ValFloat:
app.Append(st, ts, v)
case ValHistogram:
_, recoded, _, err := app.AppendHistogram(nil, st, ts, &histogram.Histogram{Sum: v, Count: uint64(v * 10)}, false)
require.NoError(t, err)
require.False(t, recoded)
case ValFloatHistogram:
_, recoded, _, err := app.AppendFloatHistogram(nil, st, ts, &histogram.FloatHistogram{Sum: v, Count: v * 10}, false)
require.NoError(t, err)
require.False(t, recoded)
default:
t.Fatalf("unsupported value type %v", vt)
}
}
get := func(it Iterator, vt ValueType) (int64, int64, float64) {
switch vt {
case ValFloat:
ts, v := it.At()
return it.AtST(), ts, v
case ValHistogram:
ts, h := it.AtHistogram(nil)
return it.AtST(), ts, float64(h.Sum)
case ValFloatHistogram:
ts, fh := it.AtFloatHistogram(nil)
return it.AtST(), ts, fh.Sum
default:
t.Fatalf("unsupported value type %v", vt)
return 0, 0, 0
}
}
runCase := func(t *testing.T, samples []triple) {
chunk := chunkFactory()
app, err := chunk.Appender()
require.NoError(t, err)
var clone []byte
for i, s := range samples {
if i == len(samples)-1 {
clone = append(clone, chunk.Bytes()...)
}
sampleAppend(app, vt, s.st, s.t, s.v)
}
chunksToTest := []Chunk{chunk}
if len(samples) > 0 {
// If there are samples, also test that appending to a chunk cloned from the original chunk works correctly.
// This tests resuming the appender from a previous chunk.
cloneChunk := chunkFactory()
cloneChunk.Reset(clone)
cloneApp, err := cloneChunk.Appender()
require.NoError(t, err)
sampleAppend(cloneApp, vt, samples[len(samples)-1].st, samples[len(samples)-1].t, samples[len(samples)-1].v)
chunksToTest = append(chunksToTest, cloneChunk)
}
printChunkName := func(i int) string {
if i == 0 {
return "original"
}
return "cloned"
}
for ci, chk := range chunksToTest {
require.Equal(t, len(samples), chk.NumSamples(), "%s chunk: number of samples mismatch", printChunkName(ci))
it := chk.Iterator(nil)
for i, s := range samples {
require.Equal(t, vt, it.Next(), "%s[%d]: value type mismatch", printChunkName(ci), i)
st, ts, f := get(it, vt)
require.Equal(t, s.t, ts, "%s[%d]: timestamp mismatch", printChunkName(ci), i)
require.Equal(t, s.st, st, "%s[%d]: start time mismatch", printChunkName(ci), i)
require.InDelta(t, s.v, f, 1e-9, "%s[%d]: value mismatch", printChunkName(ci), i)
}
require.Equal(t, ValNone, it.Next())
require.NoError(t, it.Err())
}
}
t.Run("manual for debugging", func(t *testing.T) {
samples := []triple{
{st: 0, t: 1000, v: 1.5},
{st: 0, t: 2000, v: 2.5},
{st: 0, t: 3000, v: 3.5},
{st: 0, t: 4000, v: 4.5},
}
runCase(t, samples)
})
stTimes := []int64{0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000}
ts := func(j int) int64 {
return int64(1000 * (j + 1))
}
for numberOfSamples := range 5 {
samples := make([]triple, numberOfSamples)
sampleSTidx := make([]int, numberOfSamples)
for {
for j := range numberOfSamples {
samples[j] = triple{
st: stTimes[sampleSTidx[j]],
t: ts(j),
v: float64(j) + 0.5,
}
}
t.Run(fmt.Sprintf("%v", samples), func(t *testing.T) {
runCase(t, samples)
})
exhausted := true
for j := numberOfSamples - 1; j >= 0; j-- {
if stTimes[sampleSTidx[j]] < ts(j) {
sampleSTidx[j]++
exhausted = false
break
}
sampleSTidx[j] = 0
}
if exhausted {
break
}
}
}
}

889
tsdb/chunkenc/xor2.go Normal file
View File

@ -0,0 +1,889 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// XOR2Chunk implements XOR encoding with joint timestamp+value control bits
// and byte-packed dod encoding for efficient appending. It also has an extra
// header byte after the sample count to allow for optionally encoding start
// timestamp (ST).
//
// Control prefix for samples >= 2:
//
// 0 → dod=0 AND value unchanged (1 bit)
// 10 → dod=0, value changed (2 bits, then value encoding)
// 110 → dod≠0, 13-bit signed [-4096, 4095] (prefix+dod packed into 2 bytes)
// 1110 → dod≠0, 20-bit signed [-524288, 524287] (prefix+dod packed into 3 bytes)
// 11110 → dod≠0, 64-bit escape (5+64 bits, then value encoding)
// 11111 → dod=0, stale NaN (5 bits, no value field)
//
// The dod bins are widened so that prefix+dod aligns to byte boundaries,
// replacing writeBit calls with writeByte for common cases.
//
// Value encoding for the dod≠0 cases (`<varbit_xor2>`):
//
// 0 → value unchanged
// 10 → reuse previous leading/trailing window
// 110 → new leading/trailing window
// 111 → stale NaN
//
// Value encoding for the dod=0, value-changed case (`<varbit_xor2_nn>`):
//
// 0 → reuse previous leading/trailing window
// 1 → new leading/trailing window
//
// Start timestamp (ST) encoding:
//
// 1-byte ST header (at b[chunkHeaderSize]) layout:
//
// bit 7 (0x80): firstSTKnown — ST for the first sample is present in the stream
// bits 6-0: firstSTChangeOn — sample index where the first ST change begins
//
// When no ST is provided (st == 0 always), the header stays 0x00 and the
// chunk has no additional bits in it.
//
// When ST is present, the ST delta (prevT - st) is appended after each
// sample's joint timestamp+value encoding using putVarbitInt.
package chunkenc
import (
"encoding/binary"
"math"
"math/bits"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/value"
)
const (
chunkSTHeaderSize = 1
maxFirstSTChangeOn = 0x7F
)
func writeHeaderFirstSTKnown(b []byte) {
b[0] = 0x80
}
func writeHeaderFirstSTChangeOn(b []byte, firstSTChangeOn uint16) {
// First bit indicates the initial ST value.
// Here we save the sample number from where the first change occurs in the
// rest of the byte (7 bits)
if firstSTChangeOn > maxFirstSTChangeOn {
// This should never happen, would cause corruption (ST already skipped but shouldn't).
return
}
b[0] |= uint8(firstSTChangeOn)
}
func readSTHeader(b []byte) (firstSTKnown bool, firstSTChangeOn uint8) {
if b[0] == 0x00 {
return false, 0
}
if b[0] == 0x80 {
return true, 0
}
mask := byte(0x80)
if b[0]&mask != 0 {
firstSTKnown = true
}
mask = 0x7F
return firstSTKnown, b[0] & mask
}
// XOR2Chunk holds XOR2 encoded samples with optional start
// timestamp per chunk or per sample.
type XOR2Chunk struct {
b bstream
}
// NewXOR2Chunk returns a new chunk with XOR2 encoding.
func NewXOR2Chunk() *XOR2Chunk {
b := make([]byte, chunkHeaderSize+chunkSTHeaderSize, chunkAllocationSize)
return &XOR2Chunk{b: bstream{stream: b, count: 0}}
}
func (c *XOR2Chunk) Reset(stream []byte) {
c.b.Reset(stream)
}
// Encoding returns the encoding type.
func (*XOR2Chunk) Encoding() Encoding {
return EncXOR2
}
// Bytes returns the underlying byte slice of the chunk.
func (c *XOR2Chunk) Bytes() []byte {
return c.b.bytes()
}
// NumSamples returns the number of samples in the chunk.
func (c *XOR2Chunk) NumSamples() int {
return int(binary.BigEndian.Uint16(c.Bytes()))
}
// Compact implements the Chunk interface.
func (c *XOR2Chunk) Compact() {
if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold {
buf := make([]byte, l)
copy(buf, c.b.stream)
c.b.stream = buf
}
}
// Appender implements the Chunk interface.
func (c *XOR2Chunk) Appender() (Appender, error) {
if len(c.b.stream) == chunkHeaderSize+chunkSTHeaderSize {
return &xor2Appender{
b: &c.b,
t: math.MinInt64,
leading: 0xff,
}, nil
}
it := c.iterator(nil)
for it.Next() != ValNone {
}
if err := it.Err(); err != nil {
return nil, err
}
// Set the bit position for continuing writes. The iterator's reader tracks
// how many bits remain unread in the last byte.
c.b.count = it.br.valid
a := &xor2Appender{
b: &c.b,
st: it.st,
t: it.t,
v: it.baselineV,
tDelta: it.tDelta,
stDiff: it.stDiff,
leading: it.leading,
trailing: it.trailing,
numTotal: binary.BigEndian.Uint16(c.b.bytes()),
firstSTKnown: it.firstSTKnown,
firstSTChangeOn: uint16(it.firstSTChangeOn),
}
return a, nil
}
func (c *XOR2Chunk) iterator(it Iterator) *xor2Iterator {
if iter, ok := it.(*xor2Iterator); ok {
iter.Reset(c.b.bytes())
return iter
}
iter := &xor2Iterator{}
iter.Reset(c.b.bytes())
return iter
}
// Iterator implements the Chunk interface.
func (c *XOR2Chunk) Iterator(it Iterator) Iterator {
return c.iterator(it)
}
// xor2Appender appends samples with optional start timestamps using
// the XOR2 joint control bit encoding for regular timestamp and value,
// and putVarbitInt for the start timestamp delta.
type xor2Appender struct {
b *bstream
st int64
t int64
v float64
tDelta uint64
stDiff int64 // prevT - st for the previous sample.
leading uint8
trailing uint8
numTotal uint16
firstSTChangeOn uint16
firstSTKnown bool
}
func (a *xor2Appender) Append(st, t int64, v float64) {
var (
tDelta uint64
stDiff int64
)
switch a.numTotal {
case 0:
buf := make([]byte, binary.MaxVarintLen64)
for _, b := range buf[:binary.PutVarint(buf, t)] {
a.b.writeByte(b)
}
a.b.writeBitsFast(math.Float64bits(v), 64)
if st != 0 {
for _, b := range buf[:binary.PutVarint(buf, t-st)] {
a.b.writeByte(b)
}
a.firstSTKnown = true
writeHeaderFirstSTKnown(a.b.bytes()[chunkHeaderSize:])
}
case 1:
tDelta = uint64(t - a.t)
buf := make([]byte, binary.MaxVarintLen64)
for _, b := range buf[:binary.PutUvarint(buf, tDelta)] {
a.b.writeByte(b)
}
a.writeVDelta(v)
if st != a.st {
stDiff = a.t - st
a.firstSTChangeOn = 1
writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], 1)
putVarbitInt(a.b, stDiff)
}
default:
tDelta = uint64(t - a.t)
dod := int64(tDelta - a.tDelta)
// Fast path: no ST involvement at all.
if st == 0 && a.numTotal != maxFirstSTChangeOn && a.firstSTChangeOn == 0 && !a.firstSTKnown {
a.encodeJoint(dod, v)
a.t = t
if !value.IsStaleNaN(v) {
a.v = v
}
a.tDelta = tDelta
a.numTotal++
binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal)
return
}
// Slow path: ST may be involved.
a.encodeJoint(dod, v)
if a.firstSTChangeOn == 0 {
if st != a.st || a.numTotal == maxFirstSTChangeOn {
// First ST change: record prevT - st.
stDiff = a.t - st
a.firstSTChangeOn = a.numTotal
writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], a.numTotal)
putVarbitInt(a.b, stDiff)
}
} else {
stDiff = a.t - st
putVarbitInt(a.b, stDiff-a.stDiff)
}
}
a.st = st
a.t = t
if !value.IsStaleNaN(v) {
a.v = v
}
a.tDelta = tDelta
a.stDiff = stDiff
a.numTotal++
binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal)
}
// encodeJoint writes the XOR2 joint timestamp+value control sequence for
// samples >= 2.
func (a *xor2Appender) encodeJoint(dod int64, v float64) {
if dod == 0 {
if value.IsStaleNaN(v) {
a.b.writeBitsFast(0b11111, 5)
return
}
vbits := math.Float64bits(v) ^ math.Float64bits(a.v)
if vbits == 0 {
a.b.writeBit(zero)
return
}
a.b.writeBitsFast(0b10, 2)
a.writeVDeltaKnownNonZero(vbits)
return
}
switch {
case dod >= -(1<<12) && dod <= (1<<12)-1:
// 13-bit dod: prefix `110` packed with top 5 bits → 2 bytes total.
a.b.writeByte(0b110_00000 | byte(uint64(dod)>>8)&0x1F)
a.b.writeByte(byte(uint64(dod)))
case dod >= -(1<<19) && dod <= (1<<19)-1:
// 20-bit dod: prefix `1110` packed with top 4 bits → 3 bytes total.
a.b.writeByte(0b1110_0000 | byte(uint64(dod)>>16)&0x0F)
a.b.writeByte(byte(uint64(dod) >> 8))
a.b.writeByte(byte(uint64(dod)))
default:
// 64-bit escape (rare): `11110`.
a.b.writeBitsFast(0b11110, 5)
a.b.writeBitsFast(uint64(dod), 64)
}
a.writeVDelta(v)
}
// writeVDelta encodes the value delta for the dod≠0 case.
func (a *xor2Appender) writeVDelta(v float64) {
if value.IsStaleNaN(v) {
a.b.writeBitsFast(0b111, 3)
return
}
delta := math.Float64bits(v) ^ math.Float64bits(a.v)
if delta == 0 {
a.b.writeBit(zero)
return
}
newLeading := uint8(bits.LeadingZeros64(delta))
newTrailing := uint8(bits.TrailingZeros64(delta))
if newLeading >= 32 {
newLeading = 31
}
if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing {
a.b.writeBitsFast(0b10, 2)
a.b.writeBitsFast(delta>>a.trailing, 64-int(a.leading)-int(a.trailing))
return
}
a.leading, a.trailing = newLeading, newTrailing
a.b.writeBitsFast(0b110, 3)
a.b.writeBitsFast(uint64(newLeading), 5)
sigbits := 64 - newLeading - newTrailing
a.b.writeBitsFast(uint64(sigbits), 6)
a.b.writeBitsFast(delta>>newTrailing, int(sigbits))
}
// writeVDeltaKnownNonZero encodes a precomputed value XOR delta for the
// dod=0, value-changed case. delta must be non-zero or staleNaN. Stale NaN with dod=0 is
// handled at the joint control level (`11111`) and never reaches this function.
//
// Encoding:
//
// `0` → reuse previous leading/trailing window
// `1` → new leading/trailing window
func (a *xor2Appender) writeVDeltaKnownNonZero(delta uint64) {
newLeading := uint8(bits.LeadingZeros64(delta))
newTrailing := uint8(bits.TrailingZeros64(delta))
if newLeading >= 32 {
newLeading = 31
}
if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing {
a.b.writeBit(zero)
a.b.writeBitsFast(delta>>a.trailing, 64-int(a.leading)-int(a.trailing))
return
}
a.leading, a.trailing = newLeading, newTrailing
a.b.writeBit(one)
a.b.writeBitsFast(uint64(newLeading), 5)
sigbits := 64 - newLeading - newTrailing
a.b.writeBitsFast(uint64(sigbits), 6)
a.b.writeBitsFast(delta>>newTrailing, int(sigbits))
}
func (*xor2Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) {
panic("appended a histogram sample to a float chunk")
}
func (*xor2Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) {
panic("appended a float histogram sample to a float chunk")
}
// xor2Iterator decodes XOR2 chunks.
type xor2Iterator struct {
br bstreamReader
numTotal uint16
numRead uint16
firstSTKnown bool
firstSTChangeOn uint8
leading uint8
trailing uint8
st int64
t int64
val float64
tDelta uint64
stDiff int64 // Accumulated prevT - st.
err error
baselineV float64 // Last non-stale value for XOR baseline.
}
func (it *xor2Iterator) Seek(t int64) ValueType {
if it.err != nil {
return ValNone
}
for t > it.t || it.numRead == 0 {
if it.Next() == ValNone {
return ValNone
}
}
return ValFloat
}
func (it *xor2Iterator) At() (int64, float64) {
return it.t, it.val
}
func (*xor2Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) {
panic("cannot call xor2Iterator.AtHistogram")
}
func (*xor2Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
panic("cannot call xor2Iterator.AtFloatHistogram")
}
func (it *xor2Iterator) AtT() int64 {
return it.t
}
func (it *xor2Iterator) AtST() int64 {
return it.st
}
func (it *xor2Iterator) Err() error {
return it.err
}
func (it *xor2Iterator) Reset(b []byte) {
it.br = newBReader(b[chunkHeaderSize+chunkSTHeaderSize:])
it.numTotal = binary.BigEndian.Uint16(b)
it.firstSTKnown, it.firstSTChangeOn = readSTHeader(b[chunkHeaderSize:])
it.numRead = 0
it.st = 0
it.t = 0
it.val = 0
it.leading = 0
it.trailing = 0
it.tDelta = 0
it.stDiff = 0
it.baselineV = 0
it.err = nil
}
func (it *xor2Iterator) Next() ValueType {
if it.err != nil || it.numRead == it.numTotal {
return ValNone
}
if it.numRead == 0 {
t, err := it.br.readVarint()
if err != nil {
it.err = err
return ValNone
}
v, err := it.br.readBits(64)
if err != nil {
it.err = err
return ValNone
}
it.t = t
it.val = math.Float64frombits(v)
if !value.IsStaleNaN(it.val) {
it.baselineV = it.val
}
// Optional ST for sample 0.
if it.firstSTKnown {
stDiff, err := it.br.readVarint()
if err != nil {
it.err = err
return ValNone
}
it.st = t - stDiff
}
it.numRead++
return ValFloat
}
if it.numRead == 1 {
tDelta, err := it.br.readUvarint()
if err != nil {
it.err = err
return ValNone
}
prevT := it.t
it.tDelta = tDelta
it.t += int64(it.tDelta)
if err := it.decodeValue(); err != nil {
it.err = err
return ValNone
}
// Optional ST delta for sample 1.
if it.firstSTChangeOn == 1 {
sdod, err := readVarbitInt(&it.br)
if err != nil {
it.err = err
return ValNone
}
it.stDiff = sdod
it.st = prevT - sdod
}
it.numRead++
return ValFloat
}
// Sample N >= 2: read joint XOR2 control, then optional ST data.
prevT := it.t
savedNumRead := it.numRead
ctrl, ok := it.br.readXOR2ControlFast()
if !ok {
var err error
ctrl, err = it.br.readXOR2Control()
if err != nil {
it.err = err
return ValNone
}
}
switch ctrl {
case 0:
// dod=0, value unchanged.
it.t += int64(it.tDelta)
it.val = it.baselineV
case 1:
// dod=0, value changed.
it.t += int64(it.tDelta)
if err := it.decodeValueKnownNonZero(); err != nil {
it.err = err
return ValNone
}
case 2:
// 13-bit dod.
if err := it.readDod(13); err != nil {
it.err = err
return ValNone
}
if err := it.decodeValue(); err != nil {
it.err = err
return ValNone
}
case 3:
// 20-bit dod.
if err := it.readDod(20); err != nil {
it.err = err
return ValNone
}
if err := it.decodeValue(); err != nil {
it.err = err
return ValNone
}
case 4:
// 64-bit escape.
if err := it.readDod(64); err != nil {
it.err = err
return ValNone
}
if err := it.decodeValue(); err != nil {
it.err = err
return ValNone
}
default:
// dod=0, stale NaN.
it.t += int64(it.tDelta)
it.val = math.Float64frombits(value.StaleNaN)
}
// Optional ST data, appended after the joint timestamp+value encoding.
// The ST delta was encoded as (prevT - st), using the PREVIOUS sample's t.
if it.firstSTChangeOn > 0 && savedNumRead >= uint16(it.firstSTChangeOn) {
sdod, err := readVarbitInt(&it.br)
if err != nil {
it.err = err
return ValNone
}
if savedNumRead == uint16(it.firstSTChangeOn) {
it.stDiff = sdod
} else {
it.stDiff += sdod
}
it.st = prevT - it.stDiff
}
it.numRead++
return ValFloat
}
// readDod reads a signed dod of width w bits and updates it.tDelta and it.t.
func (it *xor2Iterator) readDod(w uint8) error {
var b uint64
if it.br.valid >= w {
it.br.valid -= w
b = (it.br.buffer >> it.br.valid) & ((uint64(1) << w) - 1)
} else {
var err error
b, err = it.br.readBits(w)
if err != nil {
return err
}
}
if w < 64 && b >= (1<<(w-1)) {
b -= 1 << w
}
it.tDelta = uint64(int64(it.tDelta) + int64(b))
it.t += int64(it.tDelta)
return nil
}
// decodeValue reads the XOR2 value encoding for the dod≠0 case:
//
// `0` → value unchanged
// `10` → reuse previous leading/trailing window
// `110` → new leading/trailing window
// `111` → stale NaN
func (it *xor2Iterator) decodeValue() error {
// Fast path: 3 bits available — read the full control prefix in one shot.
// Encoding: `0`=unchanged, `10`=reuse window, `110`=new window, `111`=stale NaN.
if it.br.valid >= 3 {
ctrl := (it.br.buffer >> (it.br.valid - 3)) & 0x7
if ctrl&0x4 == 0 {
// `0xx`: value unchanged, consume 1 bit.
it.br.valid--
it.val = it.baselineV
return nil
}
if ctrl&0x6 == 0x4 {
// `10x`: reuse previous leading/trailing window, consume 2 bits.
it.br.valid -= 2
sz := uint8(64 - int(it.leading) - int(it.trailing))
var valueBits uint64
if it.br.valid >= sz {
it.br.valid -= sz
valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
} else {
var err error
valueBits, err = it.br.readBits(sz)
if err != nil {
return err
}
}
vbits := math.Float64bits(it.baselineV)
vbits ^= valueBits << it.trailing
it.val = math.Float64frombits(vbits)
it.baselineV = it.val
return nil
}
// `11x`: consume 3 bits.
it.br.valid -= 3
if ctrl == 0x6 {
// `110`: new leading/trailing window.
return it.decodeNewLeadingTrailing()
}
// `111`: stale NaN.
it.val = math.Float64frombits(value.StaleNaN)
return nil
}
// Slow path: fewer than 3 bits buffered (rare, only near buffer refills).
var bit bit
if it.br.valid > 0 {
it.br.valid--
bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
} else {
var err error
bit, err = it.br.readBit()
if err != nil {
return err
}
}
if bit == zero {
// `0` → value unchanged.
it.val = it.baselineV
return nil
}
if it.br.valid > 0 {
it.br.valid--
bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
} else {
var err error
bit, err = it.br.readBit()
if err != nil {
return err
}
}
if bit == zero {
// `10` → reuse previous leading/trailing window.
sz := uint8(64 - int(it.leading) - int(it.trailing))
var valueBits uint64
if it.br.valid >= sz {
it.br.valid -= sz
valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
} else {
var err error
valueBits, err = it.br.readBits(sz)
if err != nil {
return err
}
}
vbits := math.Float64bits(it.baselineV)
vbits ^= valueBits << it.trailing
it.val = math.Float64frombits(vbits)
it.baselineV = it.val
return nil
}
if it.br.valid > 0 {
it.br.valid--
bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
} else {
var err error
bit, err = it.br.readBit()
if err != nil {
return err
}
}
if bit == zero {
// `110` → new leading/trailing window.
return it.decodeNewLeadingTrailing()
}
// `111` → stale NaN.
it.val = math.Float64frombits(value.StaleNaN)
return nil
}
// decodeValueKnownNonZero reads the XOR2 value encoding for the dod=0,
// value-changed case:
//
// `0` → reuse previous leading/trailing window
// `1` → new leading/trailing window
func (it *xor2Iterator) decodeValueKnownNonZero() error {
sz := uint8(64 - int(it.leading) - int(it.trailing))
// Fast path: combine the 1-bit reuse/new-window control read with the
// sz-bit value read into a single buffer operation.
if it.br.valid >= 1+sz {
ctrlBit := (it.br.buffer >> (it.br.valid - 1)) & 1
if ctrlBit == 0 { // `0`: reuse previous leading/trailing window.
it.br.valid -= 1 + sz
valueBits := (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
vbits := math.Float64bits(it.baselineV)
vbits ^= valueBits << it.trailing
it.val = math.Float64frombits(vbits)
it.baselineV = it.val
return nil
}
// `1`: new leading/trailing window.
it.br.valid--
return it.decodeNewLeadingTrailing()
}
// Slow path: read control bit then value bits separately.
var bit bit
if it.br.valid > 0 {
it.br.valid--
bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
} else {
var err error
bit, err = it.br.readBit()
if err != nil {
return err
}
}
if bit == zero {
// `0` → reuse previous leading/trailing window.
var valueBits uint64
if it.br.valid >= sz {
it.br.valid -= sz
valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
} else {
var err error
valueBits, err = it.br.readBits(sz)
if err != nil {
return err
}
}
vbits := math.Float64bits(it.baselineV)
vbits ^= valueBits << it.trailing
it.val = math.Float64frombits(vbits)
it.baselineV = it.val
return nil
}
// `1` → new leading/trailing window.
return it.decodeNewLeadingTrailing()
}
// decodeNewLeadingTrailing reads a new leading/sigbits/value triple and
// updates it.leading, it.trailing, it.val, and it.baselineV.
func (it *xor2Iterator) decodeNewLeadingTrailing() error {
var newLeading, sigbits uint64
// Fast path: read leading (5 bits) and sigbits (6 bits) together as 11 bits.
if it.br.valid >= 11 {
val := (it.br.buffer >> (it.br.valid - 11)) & 0x7ff
it.br.valid -= 11
newLeading = val >> 6
sigbits = val & 0x3f
} else {
var err error
newLeading, err = it.br.readBits(5)
if err != nil {
return err
}
sigbits, err = it.br.readBits(6)
if err != nil {
return err
}
}
it.leading = uint8(newLeading)
if sigbits == 0 {
sigbits = 64
}
it.trailing = 64 - it.leading - uint8(sigbits)
n := uint8(sigbits)
var valueBits uint64
if it.br.valid >= n {
it.br.valid -= n
valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << n) - 1)
} else {
var err error
valueBits, err = it.br.readBits(n)
if err != nil {
return err
}
}
vbits := math.Float64bits(it.baselineV)
vbits ^= valueBits << it.trailing
it.val = math.Float64frombits(vbits)
it.baselineV = it.val
return nil
}

527
tsdb/chunkenc/xor2_test.go Normal file
View File

@ -0,0 +1,527 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package chunkenc
import (
"fmt"
"math"
"math/bits"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/value"
)
func newXOR2IteratorForPayload(t *testing.T, padding int, payload func(*bstream), setup func(*xor2Iterator)) *xor2Iterator {
t.Helper()
var bs bstream
if padding > 0 {
bs.writeBitsFast(0, padding)
}
payload(&bs)
// Add tail bytes so the reader initially fills a full 64-bit buffer.
bs.writeBitsFast(0, 64)
it := &xor2Iterator{}
if setup != nil {
setup(it)
}
it.br = newBReader(bs.bytes())
if padding > 0 {
_, err := it.br.readBits(uint8(padding))
require.NoError(t, err)
}
return it
}
func writeXOR2NewWindowPayload(bs *bstream, delta uint64) (leading, trailing uint8) {
leading, trailing, sigbits := xor2DeltaWindow(delta)
encodedSigbits := sigbits
if sigbits == 64 {
encodedSigbits = 0
}
bs.writeBitsFast(uint64(leading), 5)
bs.writeBitsFast(uint64(encodedSigbits), 6)
bs.writeBitsFast(delta>>trailing, int(sigbits))
return leading, trailing
}
func xor2DeltaWindow(delta uint64) (leading, trailing, sigbits uint8) {
leading = uint8(bits.LeadingZeros64(delta))
trailing = uint8(bits.TrailingZeros64(delta))
if leading >= 32 {
leading = 31
}
return leading, trailing, 64 - leading - trailing
}
func BenchmarkXor2Write(b *testing.B) {
samples := make([]struct {
t int64
v float64
}, 120)
for i := range samples {
samples[i].t = int64(i) * 1000
samples[i].v = float64(i) + float64(i)/10 + float64(i)/100 + float64(i)/1000
}
b.ReportAllocs()
for b.Loop() {
c := NewXOR2Chunk()
app, _ := c.Appender()
for _, s := range samples {
app.Append(0, s.t, s.v)
}
}
}
func BenchmarkXor2Read(b *testing.B) {
c := NewXOR2Chunk()
app, err := c.Appender()
require.NoError(b, err)
for i := int64(0); i < 120*1000; i += 1000 {
app.Append(0, i, float64(i)+float64(i)/10+float64(i)/100+float64(i)/1000)
}
b.ReportAllocs()
var it Iterator
for b.Loop() {
var ts int64
var v float64
it = c.Iterator(it)
for it.Next() != ValNone {
ts, v = it.At()
}
_, _ = ts, v
}
}
func TestXOR2Basic(t *testing.T) {
c := NewXOR2Chunk()
app, err := c.Appender()
require.NoError(t, err)
samples := []struct {
t int64
v float64
}{
{1000, 1.0},
{2000, 2.0},
{3000, 3.0},
{4000, 4.0},
{5000, 5.0},
}
for _, s := range samples {
app.Append(0, s.t, s.v)
}
it := c.Iterator(nil)
for _, expected := range samples {
require.Equal(t, ValFloat, it.Next())
ts, v := it.At()
require.Equal(t, expected.t, ts)
require.Equal(t, expected.v, v)
}
require.Equal(t, ValNone, it.Next())
}
func TestXOR2WithStaleness(t *testing.T) {
c := NewXOR2Chunk()
app, err := c.Appender()
require.NoError(t, err)
samples := []struct {
t int64
v float64
stale bool
}{
{1000, 1.0, false},
{2000, 2.0, false},
{3000, math.Float64frombits(value.StaleNaN), true},
{4000, 4.0, false},
{5000, math.Float64frombits(value.StaleNaN), true},
{6000, 6.0, false},
}
for _, s := range samples {
app.Append(0, s.t, s.v)
}
it := c.Iterator(nil)
for _, expected := range samples {
require.Equal(t, ValFloat, it.Next())
ts, v := it.At()
require.Equal(t, expected.t, ts)
if expected.stale {
require.True(t, value.IsStaleNaN(v), "Expected stale NaN at ts=%d", ts)
} else {
require.Equal(t, expected.v, v)
}
}
require.Equal(t, ValNone, it.Next())
}
func TestXOR2StaleWithDodNonZero(t *testing.T) {
c := NewXOR2Chunk()
app, err := c.Appender()
require.NoError(t, err)
// Stale NaN samples where the timestamp dod is non-zero, exercising the
// `111` value encoding path inside writeVDelta.
samples := []struct {
t int64
v float64
stale bool
}{
{1000, 1.0, false},
{2000, 2.0, false},
// dod = (1050 - 1000) - (2000 - 1000) = 50 - 1000 = -950: stale with dod≠0.
{3050, math.Float64frombits(value.StaleNaN), true},
{4050, 4.0, false},
{5050, 5.0, false},
}
for _, s := range samples {
app.Append(0, s.t, s.v)
}
it := c.Iterator(nil)
for _, expected := range samples {
require.Equal(t, ValFloat, it.Next())
ts, v := it.At()
require.Equal(t, expected.t, ts)
if expected.stale {
require.True(t, value.IsStaleNaN(v), "Expected stale NaN at ts=%d", ts)
} else {
require.Equal(t, expected.v, v)
}
}
require.Equal(t, ValNone, it.Next())
}
func TestXOR2IrregularTimestamps(t *testing.T) {
c := NewXOR2Chunk()
app, err := c.Appender()
require.NoError(t, err)
// Timestamps with dod values spanning multiple encoding ranges.
timestamps := []int64{
1000, 2000, 3000,
// dod in 13-bit range.
3050, 4050, 5050,
// dod in 20-bit range (large jitter).
5050 + 100000, 5050 + 200000, 5050 + 300000,
// Back to regular.
5050 + 301000,
}
for _, ts := range timestamps {
app.Append(0, ts, 1.0)
}
it := c.Iterator(nil)
for _, expected := range timestamps {
require.Equal(t, ValFloat, it.Next())
ts, _ := it.At()
require.Equal(t, expected, ts)
}
require.Equal(t, ValNone, it.Next())
}
func TestXOR2LargeDod(t *testing.T) {
c := NewXOR2Chunk()
app, err := c.Appender()
require.NoError(t, err)
// Force the 64-bit escape path with a very large dod.
timestamps := []int64{0, 1000, 2000, 2000 + (1 << 20)}
for _, ts := range timestamps {
app.Append(0, ts, 1.0)
}
it := c.Iterator(nil)
for _, expected := range timestamps {
require.Equal(t, ValFloat, it.Next())
ts, _ := it.At()
require.Equal(t, expected, ts)
}
require.Equal(t, ValNone, it.Next())
}
func TestXOR2ChunkST(t *testing.T) {
testChunkSTHandling(t, ValFloat, func() Chunk {
return NewXOR2Chunk()
})
}
func TestXOR2Chunk_MoreThan127Samples(t *testing.T) {
const afterMax = maxFirstSTChangeOn + 3
t.Run("zero ST", func(t *testing.T) {
chunk := NewXOR2Chunk()
app, err := chunk.Appender()
require.NoError(t, err)
for i := range afterMax {
app.Append(0, int64(i*10+1), float64(i)*1.5)
}
it := chunk.Iterator(nil)
for i := range afterMax {
require.Equal(t, ValFloat, it.Next())
st := it.AtST()
ts, v := it.At()
require.Equal(t, int64(0), st)
require.Equal(t, int64(i*10+1), ts)
require.Equal(t, float64(i)*1.5, v)
}
require.Equal(t, ValNone, it.Next())
require.NoError(t, it.Err())
})
t.Run("non-zero ST after 127", func(t *testing.T) {
chunk := NewXOR2Chunk()
app, err := chunk.Appender()
require.NoError(t, err)
for i := range afterMax {
st := int64(0)
if i == afterMax-1 {
st = int64((afterMax - 1) * 10)
}
app.Append(st, int64(i*10+1), float64(i)*1.5)
}
it := chunk.Iterator(nil)
for i := range afterMax {
require.Equal(t, ValFloat, it.Next())
st := it.AtST()
ts, v := it.At()
if i == afterMax-1 {
require.Equal(t, int64((afterMax-1)*10), st)
} else {
require.Equal(t, int64(0), st)
}
require.Equal(t, int64(i*10+1), ts)
require.Equal(t, float64(i)*1.5, v)
}
require.Equal(t, ValNone, it.Next())
require.NoError(t, it.Err())
})
}
// TestXOR2DecodeFunctionsAcrossPadding exercises decodeValue,
// decodeValueKnownNonZero, and decodeNewLeadingTrailing across all logical
// cases × all 64 bit-buffer alignments (padding 0..63). Padding controls the
// number of bits that precede the payload in the stream, which determines
// how many bits remain in the 64-bit read buffer when the decode function is
// called. This Cartesian product ensures both the fast path (enough bits
// buffered for a single-shot read) and the slow path (bits span a buffer
// refill) are exercised for every case.
func TestXOR2DecodeFunctionsAcrossPadding(t *testing.T) {
const baseline = 1234.5
type testCase struct {
name string
payload func(*bstream)
setup func(*xor2Iterator)
assert func(*testing.T, *xor2Iterator)
}
runCases := func(t *testing.T, cases []testCase, fn func(*xor2Iterator) error) {
t.Helper()
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
for padding := range 64 {
t.Run(fmt.Sprintf("padding=%d", padding), func(t *testing.T) {
it := newXOR2IteratorForPayload(t, padding, tc.payload, tc.setup)
require.NoError(t, fn(it))
tc.assert(t, it)
})
}
})
}
}
// decodeValue: `0`=unchanged, `10`=reuse window, `110`=new window, `111`=stale NaN.
t.Run("decodeValue", func(t *testing.T) {
reuseD := uint64(0x000ABCDE000000)
rL, rT, rS := xor2DeltaWindow(reuseD)
// Two new-window variants: full-width sigbits (encoded as 0) and small
// sigbits, to cover both value-bits read paths inside decodeNewLeadingTrailing.
newDFull := uint64(0xFEDCBA9876543211)
nLFull, nTFull, _ := xor2DeltaWindow(newDFull)
newDSmall := uint64(0x000ABCDE000000)
nLSmall, nTSmall, _ := xor2DeltaWindow(newDSmall)
runCases(t, []testCase{
{
name: "unchanged",
payload: func(bs *bstream) { bs.writeBit(zero) },
setup: func(it *xor2Iterator) { it.baselineV = baseline },
assert: func(t *testing.T, it *xor2Iterator) {
require.Equal(t, baseline, it.val)
require.Equal(t, baseline, it.baselineV)
},
},
{
name: "reuse_window",
payload: func(bs *bstream) {
bs.writeBitsFast(0b10, 2)
bs.writeBitsFast(reuseD>>rT, int(rS))
},
setup: func(it *xor2Iterator) {
it.baselineV = baseline
it.leading, it.trailing = rL, rT
},
assert: func(t *testing.T, it *xor2Iterator) {
expected := math.Float64frombits(math.Float64bits(baseline) ^ reuseD)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
require.Equal(t, rL, it.leading)
require.Equal(t, rT, it.trailing)
},
},
{
name: "new_window_full_sigbits",
payload: func(bs *bstream) {
bs.writeBitsFast(0b110, 3)
writeXOR2NewWindowPayload(bs, newDFull)
},
setup: func(it *xor2Iterator) { it.baselineV = baseline },
assert: func(t *testing.T, it *xor2Iterator) {
expected := math.Float64frombits(math.Float64bits(baseline) ^ newDFull)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
require.Equal(t, nLFull, it.leading)
require.Equal(t, nTFull, it.trailing)
},
},
{
name: "new_window_small_sigbits",
payload: func(bs *bstream) {
bs.writeBitsFast(0b110, 3)
writeXOR2NewWindowPayload(bs, newDSmall)
},
setup: func(it *xor2Iterator) { it.baselineV = baseline },
assert: func(t *testing.T, it *xor2Iterator) {
expected := math.Float64frombits(math.Float64bits(baseline) ^ newDSmall)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
require.Equal(t, nLSmall, it.leading)
require.Equal(t, nTSmall, it.trailing)
},
},
{
name: "stale_nan",
payload: func(bs *bstream) { bs.writeBitsFast(0b111, 3) },
setup: func(it *xor2Iterator) { it.baselineV = baseline },
assert: func(t *testing.T, it *xor2Iterator) {
require.True(t, value.IsStaleNaN(it.val))
require.Equal(t, baseline, it.baselineV)
},
},
}, (*xor2Iterator).decodeValue)
})
// decodeValueKnownNonZero: `0`=reuse window, `1`=new window.
// The new_window case uses real leading/trailing (not 0xff) so that sz is
// small enough for the fast path (valid >= 1+sz) to be reached with ctrlBit=1.
t.Run("decodeValueKnownNonZero", func(t *testing.T) {
delta := uint64(0x000ABCDE000000)
dL, dT, dS := xor2DeltaWindow(delta)
runCases(t, []testCase{
{
name: "reuse_window",
payload: func(bs *bstream) {
bs.writeBit(zero)
bs.writeBitsFast(delta>>dT, int(dS))
},
setup: func(it *xor2Iterator) {
it.baselineV = baseline
it.leading, it.trailing = dL, dT
},
assert: func(t *testing.T, it *xor2Iterator) {
expected := math.Float64frombits(math.Float64bits(baseline) ^ delta)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
},
},
{
name: "new_window",
payload: func(bs *bstream) {
bs.writeBit(one)
writeXOR2NewWindowPayload(bs, delta)
},
setup: func(it *xor2Iterator) {
it.baselineV = baseline
it.leading, it.trailing = dL, dT
},
assert: func(t *testing.T, it *xor2Iterator) {
expected := math.Float64frombits(math.Float64bits(baseline) ^ delta)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
require.Equal(t, dL, it.leading)
require.Equal(t, dT, it.trailing)
},
},
}, (*xor2Iterator).decodeValueKnownNonZero)
})
// decodeNewLeadingTrailing: exercises the 11-bit header fast path, the
// value-bits fast path (small sigbits), and full-width sigbits (encoded as 0).
t.Run("decodeNewLeadingTrailing", func(t *testing.T) {
smallD := uint64(0x000ABCDE000000)
sL, sT, _ := xor2DeltaWindow(smallD)
fullD := uint64(0xFEDCBA9876543211)
fL, fT, _ := xor2DeltaWindow(fullD)
runCases(t, []testCase{
{
name: "small_sigbits",
payload: func(bs *bstream) { writeXOR2NewWindowPayload(bs, smallD) },
setup: func(it *xor2Iterator) { it.baselineV = baseline },
assert: func(t *testing.T, it *xor2Iterator) {
require.Equal(t, sL, it.leading)
require.Equal(t, sT, it.trailing)
expected := math.Float64frombits(math.Float64bits(baseline) ^ smallD)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
},
},
{
name: "full_width_sigbits",
payload: func(bs *bstream) { writeXOR2NewWindowPayload(bs, fullD) },
setup: func(it *xor2Iterator) { it.baselineV = baseline },
assert: func(t *testing.T, it *xor2Iterator) {
require.Equal(t, fL, it.leading)
require.Equal(t, fT, it.trailing)
expected := math.Float64frombits(math.Float64bits(baseline) ^ fullD)
require.Equal(t, expected, it.val)
require.Equal(t, expected, it.baselineV)
},
},
}, (*xor2Iterator).decodeNewLeadingTrailing)
})
}

View File

@ -135,7 +135,9 @@ type Meta struct {
}
// ChunkFromSamples requires all samples to have the same type.
// TODO(krajorama): test with ST when chunk formats support it.
// It is not efficient and meant for testing purposes only.
// It scans the samples to determine whether any sample has ST set and
// creates a chunk accordingly.
func ChunkFromSamples(s []Sample) (Meta, error) {
return ChunkFromSamplesGeneric(SampleSlice(s))
}
@ -154,7 +156,17 @@ func ChunkFromSamplesGeneric(s Samples) (Meta, error) {
}
sampleType := s.Get(0).Type()
c, err := chunkenc.NewEmptyChunk(sampleType.ChunkEncoding())
hasST := false
for i := range s.Len() {
if s.Get(i).ST() != 0 {
hasST = true
break
}
}
// Request storing ST in the chunk if available.
c, err := sampleType.NewChunk(hasST)
if err != nil {
return Meta{}, err
}

View File

@ -19,6 +19,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/tsdbutil"
)
@ -60,3 +61,35 @@ func TestWriterWithDefaultSegmentSize(t *testing.T) {
require.NoError(t, err)
require.Len(t, d, 1, "expected only one segment to be created to hold both chunks")
}
func TestChunkFromSamplesWithST(t *testing.T) {
// Create samples with explicit ST (source timestamp) values.
samples := []Sample{
sample{t: 10, f: 11, st: 5},
sample{t: 20, f: 12, st: 15},
sample{t: 30, f: 13, st: 25},
}
chk, err := ChunkFromSamples(samples)
require.NoError(t, err)
require.NotNil(t, chk.Chunk)
// Verify MinTime and MaxTime.
require.Equal(t, int64(10), chk.MinTime)
require.Equal(t, int64(30), chk.MaxTime)
// Iterate over the chunk and verify ST values are preserved.
it := chk.Chunk.Iterator(nil)
idx := 0
for vt := it.Next(); vt != chunkenc.ValNone; vt = it.Next() {
require.Equal(t, chunkenc.ValFloat, vt)
ts, v := it.At()
st := it.AtST()
require.Equal(t, samples[idx].ST(), st, "ST mismatch at index %d", idx)
require.Equal(t, samples[idx].T(), ts, "T mismatch at index %d", idx)
require.Equal(t, samples[idx].F(), v, "F mismatch at index %d", idx)
idx++
}
require.NoError(t, it.Err())
require.Equal(t, len(samples), idx, "expected all samples to be iterated")
}

View File

@ -0,0 +1,130 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compression
import (
"errors"
"fmt"
"github.com/golang/snappy"
"github.com/klauspost/compress/zstd"
)
// Type represents the compression type used for encoding and decoding data.
type Type string
const (
// None represents no compression case.
// None it's a default when Type is empty.
None Type = "none"
// Snappy represents snappy block format.
Snappy Type = "snappy"
// Zstd represents zstd compression.
Zstd Type = "zstd"
)
// Encoder provides compression encoding functionality for supported compression
// types. It is agnostic to the content being compressed, operating on byte
// slices of serialized data streams. The encoder maintains internal state for
// Zstd compression and can handle multiple compression types including None,
// Snappy, and Zstd.
type Encoder struct {
w *zstd.Encoder
}
// NewEncoder creates a new Encoder. Returns an error if the zstd encoder cannot
// be initialized.
func NewEncoder() (*Encoder, error) {
e := &Encoder{}
w, err := zstd.NewWriter(nil)
if err != nil {
return nil, err
}
e.w = w
return e, nil
}
// Encode returns the encoded form of src for the given compression type. It also
// returns the indicator if the compression was performed. Encode may skip
// compressing for None type, but also when src is too large e.g. for Snappy block format.
//
// The buf is used as a buffer for returned encoding, and it must not overlap with
// src. It is valid to pass a nil buf.
func (e *Encoder) Encode(t Type, src, buf []byte) (_ []byte, compressed bool, err error) {
switch {
case len(src) == 0, t == "", t == None:
return src, false, nil
case t == Snappy:
// If MaxEncodedLen is less than 0 the record is too large to be compressed.
if snappy.MaxEncodedLen(len(src)) < 0 {
return src, false, nil
}
// The snappy library uses `len` to calculate if we need a new buffer.
// In order to allocate as few buffers as possible make the length
// equal to the capacity.
buf = buf[:cap(buf)]
return snappy.Encode(buf, src), true, nil
case t == Zstd:
if e == nil {
return nil, false, errors.New("zstd requested but encoder was not initialized with NewEncoder()")
}
return e.w.EncodeAll(src, buf[:0]), true, nil
default:
return nil, false, fmt.Errorf("unsupported compression type: %s", t)
}
}
// Decoder provides decompression functionality for supported compression types.
// It is agnostic to the content being decompressed, operating on byte slices of
// serialized data streams. The decoder maintains internal state for Zstd
// decompression and can handle multiple compression types including None,
// Snappy, and Zstd.
type Decoder struct {
r *zstd.Decoder
}
// NewDecoder creates a new Decoder.
func NewDecoder() *Decoder {
d := &Decoder{}
// Calling zstd.NewReader with a nil io.Reader and no options cannot return an error.
r, _ := zstd.NewReader(nil)
d.r = r
return d
}
// Decode returns the decoded form of src or error, given expected compression type.
//
// The buf is used as a buffer for the returned decoded entry, and it must not
// overlap with src. It is valid to pass a nil buf.
func (d *Decoder) Decode(t Type, src, buf []byte) (_ []byte, err error) {
switch {
case len(src) == 0, t == "", t == None:
return src, nil
case t == Snappy:
// The snappy library uses `len` to calculate if we need a new buffer.
// In order to allocate as few buffers as possible make the length
// equal to the capacity.
buf = buf[:cap(buf)]
return snappy.Decode(buf, src)
case t == Zstd:
if d == nil {
return nil, errors.New("zstd requested but Decoder was not initialized with NewDecoder()")
}
return d.r.DecodeAll(src, buf[:0])
default:
return nil, fmt.Errorf("unsupported compression type: %s", t)
}
}

View File

@ -240,6 +240,11 @@ type Options struct {
// is implemented.
EnableSTAsZeroSample bool
// EnableXOR2Encoding enables the XOR2 chunk encoding for float samples.
// XOR2 provides better compression than XOR, especially for stale markers.
// Automatically set to true when EnableSTStorage is true.
EnableXOR2Encoding bool
// EnableSTStorage determines whether TSDB should write a Start Timestamp (ST)
// per sample to WAL.
// TODO(bwplotka): Implement this option as per PROM-60, currently it's noop.
@ -868,6 +873,8 @@ func Open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, st
opts.FeatureRegistry.Set(features.TSDB, "isolation", !opts.IsolationDisabled)
opts.FeatureRegistry.Set(features.TSDB, "use_uncached_io", opts.UseUncachedIO)
opts.FeatureRegistry.Enable(features.TSDB, "native_histograms")
opts.FeatureRegistry.Set(features.TSDB, "st_storage", opts.EnableSTStorage)
opts.FeatureRegistry.Set(features.TSDB, "xor2_encoding", opts.EnableXOR2Encoding)
}
return open(dir, l, r, opts, rngs, stats)
@ -1074,6 +1081,8 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn
headOpts.OutOfOrderCapMax.Store(opts.OutOfOrderCapMax)
headOpts.EnableSharding = opts.EnableSharding
headOpts.EnableSTAsZeroSample = opts.EnableSTAsZeroSample
headOpts.EnableSTStorage.Store(opts.EnableSTStorage)
headOpts.EnableXOR2Encoding.Store(opts.EnableXOR2Encoding)
headOpts.EnableMetadataWALRecords = opts.EnableMetadataWALRecords
if opts.WALReplayConcurrency > 0 {
headOpts.WALReplayConcurrency = opts.WALReplayConcurrency
@ -1277,18 +1286,12 @@ func (db *DB) ApplyConfig(conf *config.Config) error {
// Update retention configuration if provided.
if conf.StorageConfig.TSDBConfig.Retention != nil {
db.retentionMtx.Lock()
if conf.StorageConfig.TSDBConfig.Retention.Time > 0 {
db.opts.RetentionDuration = int64(conf.StorageConfig.TSDBConfig.Retention.Time)
db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
}
if conf.StorageConfig.TSDBConfig.Retention.Size > 0 {
db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
}
if conf.StorageConfig.TSDBConfig.Retention.Percentage > 0 {
db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage))
}
db.opts.RetentionDuration = int64(time.Duration(conf.StorageConfig.TSDBConfig.Retention.Time) / time.Millisecond)
db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage))
db.retentionMtx.Unlock()
}
} else {

View File

@ -193,7 +193,7 @@ func TestDataNotAvailableAfterRollback_AppendV2(t *testing.T) {
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
case record.Samples, record.SamplesV2:
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
@ -968,16 +968,18 @@ func TestWALReplayRaceOnSamplesLoggedBeforeSeries_AppendV2(t *testing.T) {
// We test both with few and many samples appended after series creation. If samples are < 120 then there's no
// mmap-ed chunk, otherwise there's at least 1 mmap-ed chunk when replaying the WAL.
for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
for run := 1; run <= numRuns; run++ {
t.Run(fmt.Sprintf("samples after series creation = %d, run = %d", numSamplesAfterSeriesCreation, run), func(t *testing.T) {
testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation)
})
for _, enableSTStorage := range []bool{false, true} {
for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
for run := 1; run <= numRuns; run++ {
t.Run(fmt.Sprintf("samples after series creation = %d, run = %d, stStorage = %v", numSamplesAfterSeriesCreation, run, enableSTStorage), func(t *testing.T) {
testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation, enableSTStorage)
})
}
}
}
}
func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int) {
func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int, enableSTStorage bool) {
const numSeries = 1000
db := newTestDB(t)
@ -985,7 +987,7 @@ func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSampl
for seriesRef := 1; seriesRef <= numSeries; seriesRef++ {
// Log samples before the series is logged to the WAL.
var enc record.Encoder
enc := record.Encoder{EnableSTStorage: enableSTStorage}
var samples []record.RefSample
for ts := range numSamplesBeforeSeriesCreation {
@ -1176,139 +1178,143 @@ func TestTombstoneCleanResultEmptyBlock_AppendV2(t *testing.T) {
func TestSizeRetention_AppendV2(t *testing.T) {
t.Parallel()
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = 100
db := newTestDB(t, withOpts(opts), withRngs(100))
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = 100
db := newTestDB(t, withOpts(opts), withRngs(100))
blocks := []*BlockMeta{
{MinTime: 100, MaxTime: 200}, // Oldest block
{MinTime: 200, MaxTime: 300},
{MinTime: 300, MaxTime: 400},
{MinTime: 400, MaxTime: 500},
{MinTime: 500, MaxTime: 600}, // Newest Block
}
blocks := []*BlockMeta{
{MinTime: 100, MaxTime: 200}, // Oldest block
{MinTime: 200, MaxTime: 300},
{MinTime: 300, MaxTime: 400},
{MinTime: 400, MaxTime: 500},
{MinTime: 500, MaxTime: 600}, // Newest Block
}
for _, m := range blocks {
createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
}
for _, m := range blocks {
createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
}
headBlocks := []*BlockMeta{
{MinTime: 700, MaxTime: 800},
}
headBlocks := []*BlockMeta{
{MinTime: 700, MaxTime: 800},
}
// Add some data to the WAL.
headApp := db.Head().AppenderV2(context.Background())
var aSeries labels.Labels
var it chunkenc.Iterator
for _, m := range headBlocks {
series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
for _, s := range series {
aSeries = s.Labels()
it = s.Iterator(it)
for it.Next() == chunkenc.ValFloat {
tim, v := it.At()
_, err := headApp.Append(0, s.Labels(), 0, tim, v, nil, nil, storage.AOptions{})
// Add some data to the WAL.
headApp := db.Head().AppenderV2(context.Background())
var aSeries labels.Labels
var it chunkenc.Iterator
for _, m := range headBlocks {
series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
for _, s := range series {
aSeries = s.Labels()
it = s.Iterator(it)
for it.Next() == chunkenc.ValFloat {
tim, v := it.At()
_, err := headApp.Append(0, s.Labels(), 0, tim, v, nil, nil, storage.AOptions{})
require.NoError(t, err)
}
require.NoError(t, it.Err())
}
}
require.NoError(t, headApp.Commit())
db.Head().mmapHeadChunks()
require.Eventually(t, func() bool {
return db.Head().chunkDiskMapper.IsQueueEmpty()
}, 2*time.Second, 100*time.Millisecond)
// Test that registered size matches the actual disk size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered.
blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err := db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err := db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + Head
// chunks size
expSize := blockSize + walSize + cdmSize
actSize, err := fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Create a WAL checkpoint, and compare sizes.
first, last, err := wlog.Segments(db.Head().wal.Dir())
require.NoError(t, err)
_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0, enableSTStorage)
require.NoError(t, err)
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Truncate Chunk Disk Mapper and compare sizes.
require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Add some out of order samples to check the size of WBL.
headApp = db.Head().AppenderV2(context.Background())
for ts := int64(750); ts < 800; ts++ {
_, err := headApp.Append(0, aSeries, 0, ts, float64(ts), nil, nil, storage.AOptions{})
require.NoError(t, err)
}
require.NoError(t, it.Err())
}
require.NoError(t, headApp.Commit())
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
wblSize, err := db.Head().wbl.Size()
require.NoError(t, err)
require.NotZero(t, wblSize)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
expSize = blockSize + walSize + wblSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Decrease the max bytes limit so that a delete is triggered.
// Check total size, total count and check that the oldest block was deleted.
firstBlockSize := db.Blocks()[0].Size()
sizeLimit := actSize - firstBlockSize
db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
expBlocks := blocks[1:]
actBlocks := db.Blocks()
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + WBL size
expSize = blockSize + walSize + wblSize + cdmSize
actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
})
}
require.NoError(t, headApp.Commit())
db.Head().mmapHeadChunks()
require.Eventually(t, func() bool {
return db.Head().chunkDiskMapper.IsQueueEmpty()
}, 2*time.Second, 100*time.Millisecond)
// Test that registered size matches the actual disk size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered.
blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err := db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err := db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + Head
// chunks size
expSize := blockSize + walSize + cdmSize
actSize, err := fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Create a WAL checkpoint, and compare sizes.
first, last, err := wlog.Segments(db.Head().wal.Dir())
require.NoError(t, err)
_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0)
require.NoError(t, err)
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Truncate Chunk Disk Mapper and compare sizes.
require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Add some out of order samples to check the size of WBL.
headApp = db.Head().AppenderV2(context.Background())
for ts := int64(750); ts < 800; ts++ {
_, err := headApp.Append(0, aSeries, 0, ts, float64(ts), nil, nil, storage.AOptions{})
require.NoError(t, err)
}
require.NoError(t, headApp.Commit())
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
wblSize, err := db.Head().wbl.Size()
require.NoError(t, err)
require.NotZero(t, wblSize)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
expSize = blockSize + walSize + wblSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Decrease the max bytes limit so that a delete is triggered.
// Check total size, total count and check that the oldest block was deleted.
firstBlockSize := db.Blocks()[0].Size()
sizeLimit := actSize - firstBlockSize
db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
expBlocks := blocks[1:]
actBlocks := db.Blocks()
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + WBL size
expSize = blockSize + walSize + wblSize + cdmSize
actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
}
func TestNotMatcherSelectsLabelsUnsetSeries_AppendV2(t *testing.T) {
@ -1499,33 +1505,36 @@ func TestInitializeHeadTimestamp_AppendV2(t *testing.T) {
require.Equal(t, int64(1000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
t.Run("wal-only", func(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
for _, enableSTStorage := range []bool{false, true} {
t.Run("wal-only,stStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
dir := t.TempDir()
var enc record.Encoder
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
db := newTestDB(t, withDir(dir))
enc := record.Encoder{EnableSTStorage: enableSTStorage}
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
require.Equal(t, int64(5000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
db := newTestDB(t, withDir(dir))
require.Equal(t, int64(5000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
}
t.Run("existing-block", func(t *testing.T) {
dir := t.TempDir()
@ -1537,37 +1546,39 @@ func TestInitializeHeadTimestamp_AppendV2(t *testing.T) {
require.Equal(t, int64(2000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
t.Run("existing-block-and-wal", func(t *testing.T) {
dir := t.TempDir()
for _, enableSTStorage := range []bool{false, true} {
t.Run("existing-block-and-wal,stStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
dir := t.TempDir()
createBlock(t, dir, genSeries(1, 1, 1000, 6000))
createBlock(t, dir, genSeries(1, 1, 1000, 6000))
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
var enc record.Encoder
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
enc := record.Encoder{EnableSTStorage: enableSTStorage}
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
db := newTestDB(t, withDir(dir))
db := newTestDB(t, withDir(dir))
require.Equal(t, int64(6000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
// Check that old series has been GCed.
require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
})
require.Equal(t, int64(6000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
// Check that old series has been GCed.
require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
})
}
}
func TestNoEmptyBlocks_AppendV2(t *testing.T) {
@ -3265,7 +3276,7 @@ func testOOOWALWriteAppendV2(t *testing.T,
series, err := dec.Series(rec, nil)
require.NoError(t, err)
records = append(records, series)
case record.Samples:
case record.Samples, record.SamplesV2:
samples, err := dec.Samples(rec, nil)
require.NoError(t, err)
records = append(records, samples)
@ -3422,112 +3433,116 @@ func TestMetadataInWAL_AppenderV2(t *testing.T) {
}
func TestMetadataCheckpointingOnlyKeepsLatestEntry_AppendV2(t *testing.T) {
ctx := context.Background()
numSamples := 10000
hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
hb.opts.EnableMetadataWALRecords = true
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
ctx := context.Background()
numSamples := 10000
hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
hb.opts.EnableMetadataWALRecords = true
// Add some series so we can append metadata to them.
s1 := labels.FromStrings("a", "b")
s2 := labels.FromStrings("c", "d")
s3 := labels.FromStrings("e", "f")
s4 := labels.FromStrings("g", "h")
// Add some series so we can append metadata to them.
s1 := labels.FromStrings("a", "b")
s2 := labels.FromStrings("c", "d")
s3 := labels.FromStrings("e", "f")
s4 := labels.FromStrings("g", "h")
m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
app := hb.AppenderV2(ctx)
ts := int64(0)
_, err := app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m1})
require.NoError(t, err)
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
require.NoError(t, err)
_, err = app.Append(0, s3, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m3})
require.NoError(t, err)
_, err = app.Append(0, s4, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m4})
require.NoError(t, err)
require.NoError(t, app.Commit())
app := hb.AppenderV2(ctx)
ts := int64(0)
_, err := app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m1})
require.NoError(t, err)
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
require.NoError(t, err)
_, err = app.Append(0, s3, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m3})
require.NoError(t, err)
_, err = app.Append(0, s4, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m4})
require.NoError(t, err)
require.NoError(t, app.Commit())
// Update metadata for first series.
m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m5})
require.NoError(t, err)
require.NoError(t, app.Commit())
// Update metadata for first series.
m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m5})
require.NoError(t, err)
require.NoError(t, app.Commit())
// Switch back-and-forth metadata for second series.
// Since it ended on a new metadata record, we expect a single new entry.
m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
// Switch back-and-forth metadata for second series.
// Since it ended on a new metadata record, we expect a single new entry.
m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
require.NoError(t, err)
require.NoError(t, app.Commit())
app = hb.AppenderV2(ctx)
ts++
_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
require.NoError(t, err)
require.NoError(t, app.Commit())
// Let's create a checkpoint.
first, last, err := wlog.Segments(w.Dir())
require.NoError(t, err)
keep := func(id chunks.HeadSeriesRef) bool {
return id != 3
// Let's create a checkpoint.
first, last, err := wlog.Segments(w.Dir())
require.NoError(t, err)
keep := func(id chunks.HeadSeriesRef) bool {
return id != 3
}
_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0, enableSTStorage)
require.NoError(t, err)
// Confirm there's been a checkpoint.
cdir, _, err := wlog.LastCheckpoint(w.Dir())
require.NoError(t, err)
// Read in checkpoint and WAL.
recs := readTestWAL(t, cdir)
var gotMetadataBlocks [][]record.RefMetadata
for _, rec := range recs {
if mr, ok := rec.([]record.RefMetadata); ok {
gotMetadataBlocks = append(gotMetadataBlocks, mr)
}
}
// There should only be 1 metadata block present, with only the latest
// metadata kept around.
wantMetadata := []record.RefMetadata{
{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
}
require.Len(t, gotMetadataBlocks, 1)
require.Len(t, gotMetadataBlocks[0], 3)
gotMetadataBlock := gotMetadataBlocks[0]
sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
require.Equal(t, wantMetadata, gotMetadataBlock)
require.NoError(t, hb.Close())
})
}
_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0)
require.NoError(t, err)
// Confirm there's been a checkpoint.
cdir, _, err := wlog.LastCheckpoint(w.Dir())
require.NoError(t, err)
// Read in checkpoint and WAL.
recs := readTestWAL(t, cdir)
var gotMetadataBlocks [][]record.RefMetadata
for _, rec := range recs {
if mr, ok := rec.([]record.RefMetadata); ok {
gotMetadataBlocks = append(gotMetadataBlocks, mr)
}
}
// There should only be 1 metadata block present, with only the latest
// metadata kept around.
wantMetadata := []record.RefMetadata{
{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
}
require.Len(t, gotMetadataBlocks, 1)
require.Len(t, gotMetadataBlocks[0], 3)
gotMetadataBlock := gotMetadataBlocks[0]
sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
require.Equal(t, wantMetadata, gotMetadataBlock)
require.NoError(t, hb.Close())
}
func TestMetadataAssertInMemoryData_AppendV2(t *testing.T) {
@ -7489,6 +7504,65 @@ func TestAbortBlockCompactions_AppendV2(t *testing.T) {
require.Equal(t, 4, compactions, "expected 4 compactions to be completed")
}
// TestCompactHeadWithSTStorage_AppendV2 ensures that when EnableSTStorage is true,
// compacted blocks contain chunks with EncXOR2 encoding for float samples.
func TestCompactHeadWithSTStorage_AppendV2(t *testing.T) {
t.Parallel()
opts := &Options{
RetentionDuration: int64(time.Hour * 24 * 15 / time.Millisecond),
NoLockfile: true,
MinBlockDuration: int64(time.Hour * 2 / time.Millisecond),
MaxBlockDuration: int64(time.Hour * 2 / time.Millisecond),
WALCompression: compression.Snappy,
EnableSTStorage: true,
EnableXOR2Encoding: true,
}
db := newTestDB(t, withOpts(opts))
ctx := context.Background()
app := db.AppenderV2(ctx)
mint := 100
maxt := 200
for i := mint; i < maxt; i++ {
_, err := app.Append(0, labels.FromStrings("a", "b"), 50, int64(i), float64(i), nil, nil, storage.AOptions{})
require.NoError(t, err)
}
require.NoError(t, app.Commit())
require.NoError(t, db.CompactHead(NewRangeHead(db.Head(), int64(mint), int64(maxt)-1)))
require.Len(t, db.Blocks(), 1)
b := db.Blocks()[0]
chunkr, err := b.Chunks()
require.NoError(t, err)
defer chunkr.Close()
indexr, err := b.Index()
require.NoError(t, err)
defer indexr.Close()
p, err := indexr.Postings(ctx, "a", "b")
require.NoError(t, err)
chunkCount := 0
for p.Next() {
var builder labels.ScratchBuilder
var chks []chunks.Meta
require.NoError(t, indexr.Series(p.At(), &builder, &chks))
for _, chk := range chks {
c, _, err := chunkr.ChunkOrIterable(chk)
require.NoError(t, err)
require.Equal(t, chunkenc.EncXOR2, c.Encoding(),
"unexpected chunk encoding, got %s", c.Encoding())
chunkCount++
}
}
require.NoError(t, p.Err())
require.Positive(t, chunkCount, "expected at least one chunk")
}
func TestNewCompactorFunc_AppendV2(t *testing.T) {
opts := DefaultOptions()
block1 := ulid.MustNew(1, nil)
@ -7520,3 +7594,111 @@ func TestNewCompactorFunc_AppendV2(t *testing.T) {
require.Len(t, ulids, 1)
require.Equal(t, block2, ulids[0])
}
// TestDBAppenderV2_STStorage_OutOfOrder verifies that ST storage works correctly
// when samples are appended out of order and can be queried using ChunkQuerier.
func TestDBAppenderV2_STStorage_OutOfOrder(t *testing.T) {
testHistogram := tsdbutil.GenerateTestHistogram(1)
testHistogram.CounterResetHint = histogram.NotCounterReset
testCases := []struct {
name string
appendSamples []chunks.Sample
expectedSamples []chunks.Sample
}{
{
name: "Float samples out of order",
appendSamples: []chunks.Sample{
newSample(20, 200, 2.0, nil, nil), // Append second sample first.
newSample(10, 100, 1.0, nil, nil), // Append first sample second (OOO).
newSample(30, 300, 3.0, nil, nil), // Append third sample last.
newSample(25, 250, 2.5, nil, nil), // Append middle sample (OOO).
},
expectedSamples: []chunks.Sample{
newSample(10, 100, 1.0, nil, nil),
newSample(20, 200, 2.0, nil, nil),
newSample(25, 250, 2.5, nil, nil),
newSample(30, 300, 3.0, nil, nil),
},
},
{
name: "Histogram samples out of order",
appendSamples: []chunks.Sample{
newSample(30, 300, 0, testHistogram, nil), // Append third sample first.
newSample(10, 100, 0, testHistogram, nil), // Append first sample second (OOO).
newSample(20, 200, 0, testHistogram, nil), // Append second sample last (OOO).
},
// Histograms don't support ST storage yet, should return 0 for ST.
expectedSamples: []chunks.Sample{
newSample(0, 100, 0, testHistogram, nil),
newSample(0, 200, 0, testHistogram, nil),
newSample(0, 300, 0, testHistogram, nil),
},
},
{
name: "Mixed float samples with same ST",
appendSamples: []chunks.Sample{
newSample(10, 200, 2.0, nil, nil),
newSample(10, 100, 1.0, nil, nil), // OOO with same ST.
newSample(10, 300, 3.0, nil, nil),
},
expectedSamples: []chunks.Sample{
newSample(10, 100, 1.0, nil, nil),
newSample(10, 200, 2.0, nil, nil),
newSample(10, 300, 3.0, nil, nil),
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
opts.EnableSTStorage = true
opts.EnableXOR2Encoding = true
db := newTestDB(t, withOpts(opts))
db.DisableCompactions()
lbls := labels.FromStrings("foo", "bar")
for _, s := range tc.appendSamples {
app := db.AppenderV2(context.Background())
_, err := app.Append(0, lbls, s.ST(), s.T(), s.F(), s.H(), s.FH(), storage.AOptions{})
require.NoError(t, err, "Appending OOO sample with ST should succeed")
require.NoError(t, app.Commit(), "Committing OOO sample with ST should succeed")
}
querier, err := db.ChunkQuerier(math.MinInt64, math.MaxInt64)
require.NoError(t, err)
defer querier.Close()
ss := querier.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.True(t, ss.Next(), "Should have series")
series := ss.At()
require.NoError(t, ss.Err())
require.False(t, ss.Next(), "Should have only one series")
chunkIt := series.Iterator(nil)
var actualSamples []chunks.Sample
for chunkIt.Next() {
chk := chunkIt.At()
it := chk.Chunk.Iterator(nil)
samples, err := storage.ExpandSamples(it, newSample)
require.NoError(t, err)
actualSamples = append(actualSamples, samples...)
}
require.NoError(t, chunkIt.Err())
// Use requireEqualSamplesIgnoreCounterResets to ignore histogram counter reset hints.
requireEqualSamples(t, lbls.String(), tc.expectedSamples, actualSamples, requireEqualSamplesIgnoreCounterResets)
// Additionally verify ST values match expectations.
require.Len(t, actualSamples, len(tc.expectedSamples))
for i, expected := range tc.expectedSamples {
actual := actualSamples[i]
require.Equal(t, expected.ST(), actual.ST(), "Sample %d: ST should match", i)
}
})
}
}

View File

@ -395,7 +395,7 @@ func TestDataNotAvailableAfterRollback(t *testing.T) {
require.NoError(t, err)
walSeriesCount += len(series)
case record.Samples:
case record.Samples, record.SamplesV2:
var samples []record.RefSample
samples, err = dec.Samples(rec, samples)
require.NoError(t, err)
@ -1170,24 +1170,25 @@ func TestWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T) {
// We test both with few and many samples appended after series creation. If samples are < 120 then there's no
// mmap-ed chunk, otherwise there's at least 1 mmap-ed chunk when replaying the WAL.
for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
for run := 1; run <= numRuns; run++ {
t.Run(fmt.Sprintf("samples after series creation = %d, run = %d", numSamplesAfterSeriesCreation, run), func(t *testing.T) {
testWALReplayRaceOnSamplesLoggedBeforeSeries(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation)
})
for _, enableSTStorage := range []bool{false, true} {
for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
for run := 1; run <= numRuns; run++ {
t.Run(fmt.Sprintf("samples after series creation = %d, run = %d, stStorage=%v", numSamplesAfterSeriesCreation, run, enableSTStorage), func(t *testing.T) {
testWALReplayRaceOnSamplesLoggedBeforeSeries(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation, enableSTStorage)
})
}
}
}
}
func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int) {
func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int, enableSTStorage bool) {
const numSeries = 1000
db := newTestDB(t)
db.DisableCompactions()
for seriesRef := 1; seriesRef <= numSeries; seriesRef++ {
// Log samples before the series is logged to the WAL.
var enc record.Encoder
enc := record.Encoder{EnableSTStorage: enableSTStorage}
var samples []record.RefSample
for ts := range numSamplesBeforeSeriesCreation {
@ -1551,139 +1552,143 @@ func TestRetentionDurationMetric(t *testing.T) {
func TestSizeRetention(t *testing.T) {
t.Parallel()
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = 100
db := newTestDB(t, withOpts(opts), withRngs(100))
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
opts := DefaultOptions()
opts.OutOfOrderTimeWindow = 100
db := newTestDB(t, withOpts(opts), withRngs(100))
blocks := []*BlockMeta{
{MinTime: 100, MaxTime: 200}, // Oldest block
{MinTime: 200, MaxTime: 300},
{MinTime: 300, MaxTime: 400},
{MinTime: 400, MaxTime: 500},
{MinTime: 500, MaxTime: 600}, // Newest Block
}
blocks := []*BlockMeta{
{MinTime: 100, MaxTime: 200}, // Oldest block
{MinTime: 200, MaxTime: 300},
{MinTime: 300, MaxTime: 400},
{MinTime: 400, MaxTime: 500},
{MinTime: 500, MaxTime: 600}, // Newest Block
}
for _, m := range blocks {
createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
}
for _, m := range blocks {
createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
}
headBlocks := []*BlockMeta{
{MinTime: 700, MaxTime: 800},
}
headBlocks := []*BlockMeta{
{MinTime: 700, MaxTime: 800},
}
// Add some data to the WAL.
headApp := db.Head().Appender(context.Background())
var aSeries labels.Labels
var it chunkenc.Iterator
for _, m := range headBlocks {
series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
for _, s := range series {
aSeries = s.Labels()
it = s.Iterator(it)
for it.Next() == chunkenc.ValFloat {
tim, v := it.At()
_, err := headApp.Append(0, s.Labels(), tim, v)
// Add some data to the WAL.
headApp := db.Head().Appender(context.Background())
var aSeries labels.Labels
var it chunkenc.Iterator
for _, m := range headBlocks {
series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
for _, s := range series {
aSeries = s.Labels()
it = s.Iterator(it)
for it.Next() == chunkenc.ValFloat {
tim, v := it.At()
_, err := headApp.Append(0, s.Labels(), tim, v)
require.NoError(t, err)
}
require.NoError(t, it.Err())
}
}
require.NoError(t, headApp.Commit())
db.Head().mmapHeadChunks()
require.Eventually(t, func() bool {
return db.Head().chunkDiskMapper.IsQueueEmpty()
}, 2*time.Second, 100*time.Millisecond)
// Test that registered size matches the actual disk size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered.
blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err := db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err := db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + Head
// chunks size
expSize := blockSize + walSize + cdmSize
actSize, err := fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Create a WAL checkpoint, and compare sizes.
first, last, err := wlog.Segments(db.Head().wal.Dir())
require.NoError(t, err)
_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0, enableSTStorage)
require.NoError(t, err)
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Truncate Chunk Disk Mapper and compare sizes.
require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Add some out of order samples to check the size of WBL.
headApp = db.Head().Appender(context.Background())
for ts := int64(750); ts < 800; ts++ {
_, err := headApp.Append(0, aSeries, ts, float64(ts))
require.NoError(t, err)
}
require.NoError(t, it.Err())
}
require.NoError(t, headApp.Commit())
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
wblSize, err := db.Head().wbl.Size()
require.NoError(t, err)
require.NotZero(t, wblSize)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
expSize = blockSize + walSize + wblSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Decrease the max bytes limit so that a delete is triggered.
// Check total size, total count and check that the oldest block was deleted.
firstBlockSize := db.Blocks()[0].Size()
sizeLimit := actSize - firstBlockSize
db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
expBlocks := blocks[1:]
actBlocks := db.Blocks()
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + WBL size
expSize = blockSize + walSize + wblSize + cdmSize
actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
})
}
require.NoError(t, headApp.Commit())
db.Head().mmapHeadChunks()
require.Eventually(t, func() bool {
return db.Head().chunkDiskMapper.IsQueueEmpty()
}, 2*time.Second, 100*time.Millisecond)
// Test that registered size matches the actual disk size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered.
blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err := db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err := db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + Head
// chunks size
expSize := blockSize + walSize + cdmSize
actSize, err := fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Create a WAL checkpoint, and compare sizes.
first, last, err := wlog.Segments(db.Head().wal.Dir())
require.NoError(t, err)
_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0)
require.NoError(t, err)
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Truncate Chunk Disk Mapper and compare sizes.
require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
expSize = blockSize + walSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Add some out of order samples to check the size of WBL.
headApp = db.Head().Appender(context.Background())
for ts := int64(750); ts < 800; ts++ {
_, err := headApp.Append(0, aSeries, ts, float64(ts))
require.NoError(t, err)
}
require.NoError(t, headApp.Commit())
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
wblSize, err := db.Head().wbl.Size()
require.NoError(t, err)
require.NotZero(t, wblSize)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
expSize = blockSize + walSize + wblSize + cdmSize
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
// Decrease the max bytes limit so that a delete is triggered.
// Check total size, total count and check that the oldest block was deleted.
firstBlockSize := db.Blocks()[0].Size()
sizeLimit := actSize - firstBlockSize
db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size.
require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
expBlocks := blocks[1:]
actBlocks := db.Blocks()
blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
walSize, err = db.Head().wal.Size()
require.NoError(t, err)
cdmSize, err = db.Head().chunkDiskMapper.Size()
require.NoError(t, err)
require.NotZero(t, cdmSize)
// Expected size should take into account block size + WAL size + WBL size
expSize = blockSize + walSize + wblSize + cdmSize
actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
actSize, err = fileutil.DirSize(db.Dir())
require.NoError(t, err)
require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
}
func TestSizeRetentionMetric(t *testing.T) {
@ -1743,7 +1748,7 @@ func TestRuntimeRetentionConfigChange(t *testing.T) {
StorageConfig: config.StorageConfig{
TSDBConfig: &config.TSDBConfig{
Retention: &config.TSDBRetentionConfig{
Time: model.Duration(shorterRetentionDuration),
Time: model.Duration(time.Duration(shorterRetentionDuration) * time.Millisecond),
},
},
},
@ -1772,6 +1777,31 @@ func TestRuntimeRetentionConfigChange(t *testing.T) {
require.Positive(t, int(prom_testutil.ToFloat64(db.metrics.timeRetentionCount)), "time retention count should be incremented")
}
// TestApplyConfigRetentionDurationMetricUnit verifies that after a config
// reload the prometheus_tsdb_retention_limit_seconds metric reports the
// retention in seconds.
func TestApplyConfigRetentionDurationMetricUnit(t *testing.T) {
oneHourMs := int64(time.Hour / time.Millisecond)
db := newTestDB(t, withOpts(&Options{RetentionDuration: oneHourMs}))
cfg := &config.Config{
StorageConfig: config.StorageConfig{
TSDBConfig: &config.TSDBConfig{
Retention: &config.TSDBRetentionConfig{
Time: model.Duration(time.Hour),
},
},
},
}
require.NoError(t, db.ApplyConfig(cfg))
require.Equal(t, oneHourMs, db.getRetentionDuration())
gotSeconds := prom_testutil.ToFloat64(db.metrics.retentionDuration)
wantSeconds := time.Hour.Seconds()
require.Equal(t, wantSeconds, gotSeconds)
}
func TestNotMatcherSelectsLabelsUnsetSeries(t *testing.T) {
db := newTestDB(t)
@ -2072,33 +2102,36 @@ func TestInitializeHeadTimestamp(t *testing.T) {
require.Equal(t, int64(1000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
t.Run("wal-only", func(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
for _, enableSTStorage := range []bool{false, true} {
t.Run("wal-only-st-"+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
dir := t.TempDir()
var enc record.Encoder
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
db := newTestDB(t, withDir(dir))
enc := record.Encoder{EnableSTStorage: enableSTStorage}
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
require.Equal(t, int64(5000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
db := newTestDB(t, withDir(dir))
require.Equal(t, int64(5000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
}
t.Run("existing-block", func(t *testing.T) {
dir := t.TempDir()
@ -2110,37 +2143,40 @@ func TestInitializeHeadTimestamp(t *testing.T) {
require.Equal(t, int64(2000), db.head.MaxTime())
require.True(t, db.head.initialized())
})
t.Run("existing-block-and-wal", func(t *testing.T) {
dir := t.TempDir()
createBlock(t, dir, genSeries(1, 1, 1000, 6000))
for _, enableSTStorage := range []bool{false, true} {
t.Run("existing-block-and-wal,enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
createBlock(t, dir, genSeries(1, 1, 1000, 6000))
var enc record.Encoder
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
require.NoError(t, err)
db := newTestDB(t, withDir(dir))
enc := record.Encoder{EnableSTStorage: enableSTStorage}
err = w.Log(
enc.Series([]record.RefSeries{
{Ref: 123, Labels: labels.FromStrings("a", "1")},
{Ref: 124, Labels: labels.FromStrings("a", "2")},
}, nil),
enc.Samples([]record.RefSample{
{Ref: 123, T: 5000, V: 1},
{Ref: 124, T: 15000, V: 1},
}, nil),
)
require.NoError(t, err)
require.NoError(t, w.Close())
require.Equal(t, int64(6000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
// Check that old series has been GCed.
require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
})
db := newTestDB(t, withDir(dir))
require.Equal(t, int64(6000), db.head.MinTime())
require.Equal(t, int64(15000), db.head.MaxTime())
require.True(t, db.head.initialized())
// Check that old series has been GCed.
require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
})
}
}
func TestNoEmptyBlocks(t *testing.T) {
@ -4523,7 +4559,7 @@ func testOOOWALWrite(t *testing.T,
series, err := dec.Series(rec, nil)
require.NoError(t, err)
records = append(records, series)
case record.Samples:
case record.Samples, record.SamplesV2:
samples, err := dec.Samples(rec, nil)
require.NoError(t, err)
records = append(records, samples)
@ -4684,102 +4720,106 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) {
require.NoError(t, err)
}
ctx := context.Background()
numSamples := 10000
hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
ctx := context.Background()
numSamples := 10000
hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
// Add some series so we can append metadata to them.
app := hb.Appender(ctx)
s1 := labels.FromStrings("a", "b")
s2 := labels.FromStrings("c", "d")
s3 := labels.FromStrings("e", "f")
s4 := labels.FromStrings("g", "h")
// Add some series so we can append metadata to them.
app := hb.Appender(ctx)
s1 := labels.FromStrings("a", "b")
s2 := labels.FromStrings("c", "d")
s3 := labels.FromStrings("e", "f")
s4 := labels.FromStrings("g", "h")
for _, s := range []labels.Labels{s1, s2, s3, s4} {
_, err := app.Append(0, s, 0, 0)
require.NoError(t, err)
for _, s := range []labels.Labels{s1, s2, s3, s4} {
_, err := app.Append(0, s, 0, 0)
require.NoError(t, err)
}
require.NoError(t, app.Commit())
// Add a first round of metadata to the first three series.
// Re-take the Appender, as the previous Commit will have it closed.
m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
app = hb.Appender(ctx)
updateMetadata(t, app, s1, m1)
updateMetadata(t, app, s2, m2)
updateMetadata(t, app, s3, m3)
updateMetadata(t, app, s4, m4)
require.NoError(t, app.Commit())
// Update metadata for first series.
m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
app = hb.Appender(ctx)
updateMetadata(t, app, s1, m5)
require.NoError(t, app.Commit())
// Switch back-and-forth metadata for second series.
// Since it ended on a new metadata record, we expect a single new entry.
m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m6)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m2)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m6)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m2)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m6)
require.NoError(t, app.Commit())
// Let's create a checkpoint.
first, last, err := wlog.Segments(w.Dir())
require.NoError(t, err)
keep := func(id chunks.HeadSeriesRef) bool {
return id != 3
}
_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0, enableSTStorage)
require.NoError(t, err)
// Confirm there's been a checkpoint.
cdir, _, err := wlog.LastCheckpoint(w.Dir())
require.NoError(t, err)
// Read in checkpoint and WAL.
recs := readTestWAL(t, cdir)
var gotMetadataBlocks [][]record.RefMetadata
for _, rec := range recs {
if mr, ok := rec.([]record.RefMetadata); ok {
gotMetadataBlocks = append(gotMetadataBlocks, mr)
}
}
// There should only be 1 metadata block present, with only the latest
// metadata kept around.
wantMetadata := []record.RefMetadata{
{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
}
require.Len(t, gotMetadataBlocks, 1)
require.Len(t, gotMetadataBlocks[0], 3)
gotMetadataBlock := gotMetadataBlocks[0]
sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
require.Equal(t, wantMetadata, gotMetadataBlock)
require.NoError(t, hb.Close())
})
}
require.NoError(t, app.Commit())
// Add a first round of metadata to the first three series.
// Re-take the Appender, as the previous Commit will have it closed.
m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
app = hb.Appender(ctx)
updateMetadata(t, app, s1, m1)
updateMetadata(t, app, s2, m2)
updateMetadata(t, app, s3, m3)
updateMetadata(t, app, s4, m4)
require.NoError(t, app.Commit())
// Update metadata for first series.
m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
app = hb.Appender(ctx)
updateMetadata(t, app, s1, m5)
require.NoError(t, app.Commit())
// Switch back-and-forth metadata for second series.
// Since it ended on a new metadata record, we expect a single new entry.
m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m6)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m2)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m6)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m2)
require.NoError(t, app.Commit())
app = hb.Appender(ctx)
updateMetadata(t, app, s2, m6)
require.NoError(t, app.Commit())
// Let's create a checkpoint.
first, last, err := wlog.Segments(w.Dir())
require.NoError(t, err)
keep := func(id chunks.HeadSeriesRef) bool {
return id != 3
}
_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0)
require.NoError(t, err)
// Confirm there's been a checkpoint.
cdir, _, err := wlog.LastCheckpoint(w.Dir())
require.NoError(t, err)
// Read in checkpoint and WAL.
recs := readTestWAL(t, cdir)
var gotMetadataBlocks [][]record.RefMetadata
for _, rec := range recs {
if mr, ok := rec.([]record.RefMetadata); ok {
gotMetadataBlocks = append(gotMetadataBlocks, mr)
}
}
// There should only be 1 metadata block present, with only the latest
// metadata kept around.
wantMetadata := []record.RefMetadata{
{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
}
require.Len(t, gotMetadataBlocks, 1)
require.Len(t, gotMetadataBlocks[0], 3)
gotMetadataBlock := gotMetadataBlocks[0]
sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
require.Equal(t, wantMetadata, gotMetadataBlock)
require.NoError(t, hb.Close())
}
func TestMetadataAssertInMemoryData(t *testing.T) {

View File

@ -65,6 +65,96 @@ Notes:
* `padding` of 0 to 7 bits so that the whole chunk data is byte-aligned.
* The chunk can have as few as one sample, i.e. `ts_1`, `v_1`, etc. are optional.
## XOR2 chunk data
XOR2 uses the same structure as XOR for samples 0 and 1. Starting from sample 2,
a joint control prefix encodes both the timestamp delta-of-delta (dod) and whether
the value changed, with common dod cases byte-aligned for efficient writing.
XOR2 can encode start timestamp (ST) as well optionally, see details further
down.
```
┌──────────────────────┬───────────────────┬───────────────┬───────────────┬────────────────┬─-
│ num_samples <uint16> │ st_header <uint8> | ts_0 <varint> │ v_0 <float64> │ ?st_0 <varint> |
└──────────────────────┴───────────────────┴───────────────┴───────────────┴────────────────┴─-
-─────────────────────┬───────────────────────┬─────────────────────────┬─-
ts_1_delta <uvarint> │ v_1_xor <varbit_xor2> │ ?st_1_delta <varbit_ts> |
-─────────────────────┴───────────────────────┴─────────────────────────┴─-
-─────────────────────────┬───────────────────────┬─────┬─-
sample_2 <joint_sample2> │ ?st_2_dod <varbit_ts> | ... │
-─────────────────────────┴───────────────────────┴─────┴─-
-─────────────────────────┬───────────────────────┬──────────────────┐
sample_n <joint_sample2> │ ?st_n_dod <varbit_ts> | padding <x bits>
-─────────────────────────┴───────────────────────┴──────────────────┘
```
### Joint sample encoding for n >= 2 (`<joint_sample2>`):
Each sample starts with a variable-length control prefix that jointly encodes the
dod and value change status:
| Control prefix | dod | Value encoding that follows |
|---|---|---|
| `0` | 0 | (none, value unchanged) |
| `10` | 0 | `<varbit_xor2_nn>` (value known non-zero and non-stale) |
| `110DDDDD` `DDDDDDDD` | 13-bit signed [-4096, 4095] | `<varbit_xor2>` |
| `1110DDDD` `DDDDDDDD` `DDDDDDDD` | 20-bit signed [-524288, 524287] | `<varbit_xor2>` |
| `11110` + 64-bit dod | exact | `<varbit_xor2>` |
| `11111` | 0 | (none, stale NaN — no value field) |
The `110` and `1110` cases pack the prefix and the most-significant dod bits into
the first byte, making the full dod field byte-aligned.
### Value delta encoding (`<varbit_xor2>`):
Used after the dod≠0 control prefixes. The XOR of the current and previous value is encoded as:
| Prefix | Meaning |
|---|---|
| `0` | XOR = 0 (value unchanged) |
| `10` | Reuse previous leading/trailing window; `sigbits` value bits follow |
| `110` + leading(5) + sigbits(6) + value(sigbits) | New leading/trailing window |
| `111` | Stale NaN marker (3 bits) |
### Value delta encoding, known non-zero (`<varbit_xor2_nn>`):
Used after the `10` control prefix (dod=0, value known to have changed and be non-stale).
The delta=0 check is skipped, saving one bit on the reuse path:
| Prefix | Meaning |
|---|---|
| `0` | Reuse previous leading/trailing window; `sigbits` value bits follow |
| `1` + leading(5) + sigbits(6) + value(sigbits) | New leading/trailing window |
### Start timestamp encoding
* We use `st_i_dod` and `st_i` interchangeably when `i>1` in these notes.
* `st_header` is one byte:
```
┌───────────────────────┬───────────────────────┐
│ first_st_known<1 bit> | st_changed_on<7 bits>
└───────────────────────┴───────────────────────┘
```
where the highest bit `first_st_known` indicates if `st_0` is present or not.
If the lower 7bits `st_changed_on` is 0, no `st_i (i>0)` is present.
Otherwise `st_i (i>=st_changed_on>)` is present, while
`st_i (0<i<st_changed_on)` is not present.
Due to the 7 bit limitation, once a chunk has at least 127 samples,
`st_changed_on` is set to 127 (0xEF) and the 127th and further samples will
have `st_i` present.
* `st_0` is encoded as a `varint` if present.
* `st_1` is encoded as a `varbit_ts` delta from `st_0` (or from 0 if `st_0` is
not present).
* `st_i_dod` aka `st_i (i>1)` is encoded as a `varbit_ts` "delta of delta" from
`st_i-1` (or from 0 if `st_i-1` is not present).
## Histogram chunk data
```

View File

@ -161,6 +161,15 @@ type HeadOptions struct {
OutOfOrderTimeWindow atomic.Int64
OutOfOrderCapMax atomic.Int64
// EnableSTStorage determines whether databases (WAL/WBL, tsdb,
// agent) should set a Start Time value per sample.
// Represents 'st-storage' feature flag.
EnableSTStorage atomic.Bool
// EnableXOR2Encoding enables XOR2 chunk encoding for float samples.
// Represents 'xor2-encoding' feature flag.
EnableXOR2Encoding atomic.Bool
ChunkRange int64
// ChunkDirRoot is the parent directory of the chunks directory.
ChunkDirRoot string
@ -1382,7 +1391,7 @@ func (h *Head) truncateWAL(mint int64) error {
}
h.metrics.checkpointCreationTotal.Inc()
if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpointFn(mint), mint); err != nil {
if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpointFn(mint), mint, h.opts.EnableSTStorage.Load()); err != nil {
h.metrics.checkpointCreationFail.Inc()
var cerr *chunks.CorruptionErr
if errors.As(err, &cerr) {
@ -1676,7 +1685,7 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match
}
if h.wal != nil {
var enc record.Encoder
enc := record.Encoder{EnableSTStorage: h.opts.EnableSTStorage.Load()}
if err := h.wal.Log(enc.Tombstones(stones, nil)); err != nil {
return err
}

View File

@ -185,6 +185,8 @@ func (h *Head) appender() *headAppender {
typesInBatch: h.getTypeMap(),
appendID: appendID,
cleanupAppendIDsBelow: cleanupAppendIDsBelow,
storeST: h.opts.EnableSTStorage.Load(),
useXOR2: h.opts.EnableXOR2Encoding.Load(),
},
}
}
@ -412,6 +414,8 @@ type headAppenderBase struct {
appendID, cleanupAppendIDsBelow uint64
closed bool
storeST bool
useXOR2 bool
}
type headAppender struct {
headAppenderBase
@ -1059,7 +1063,7 @@ func (a *headAppenderBase) log() error {
defer func() { a.head.putBytesBuffer(buf) }()
var rec []byte
var enc record.Encoder
enc := record.Encoder{EnableSTStorage: a.storeST}
if len(a.seriesRefs) > 0 {
rec = enc.Series(a.seriesRefs, buf)
@ -1168,6 +1172,7 @@ type appenderCommitContext struct {
histoOOBRejected int
inOrderMint int64
inOrderMaxt int64
appendChunkOpts chunkOpts
oooMinT int64
oooMaxT int64
wblSamples []record.RefSample
@ -1177,8 +1182,7 @@ type appenderCommitContext struct {
oooMmapMarkersCount int
oooRecords [][]byte
oooCapMax int64
appendChunkOpts chunkOpts
enc record.Encoder
oooEnc record.Encoder
}
// commitExemplars adds all exemplars from the provided batch to the head's exemplar storage.
@ -1228,31 +1232,31 @@ func (acc *appenderCommitContext) collectOOORecords(a *headAppenderBase) {
})
}
}
r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer())
r := acc.oooEnc.MmapMarkers(markers, a.head.getBytesBuffer())
acc.oooRecords = append(acc.oooRecords, r)
}
if len(acc.wblSamples) > 0 {
r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer())
r := acc.oooEnc.Samples(acc.wblSamples, a.head.getBytesBuffer())
acc.oooRecords = append(acc.oooRecords, r)
}
if len(acc.wblHistograms) > 0 {
r, customBucketsHistograms := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer())
r, customBucketsHistograms := acc.oooEnc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer())
if len(r) > 0 {
acc.oooRecords = append(acc.oooRecords, r)
}
if len(customBucketsHistograms) > 0 {
r := acc.enc.CustomBucketsHistogramSamples(customBucketsHistograms, a.head.getBytesBuffer())
r := acc.oooEnc.CustomBucketsHistogramSamples(customBucketsHistograms, a.head.getBytesBuffer())
acc.oooRecords = append(acc.oooRecords, r)
}
}
if len(acc.wblFloatHistograms) > 0 {
r, customBucketsFloatHistograms := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer())
r, customBucketsFloatHistograms := acc.oooEnc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer())
if len(r) > 0 {
acc.oooRecords = append(acc.oooRecords, r)
}
if len(customBucketsFloatHistograms) > 0 {
r := acc.enc.CustomBucketsFloatHistogramSamples(customBucketsFloatHistograms, a.head.getBytesBuffer())
r := acc.oooEnc.CustomBucketsFloatHistogramSamples(customBucketsFloatHistograms, a.head.getBytesBuffer())
acc.oooRecords = append(acc.oooRecords, r)
}
}
@ -1387,7 +1391,7 @@ func (a *headAppenderBase) commitFloats(b *appendBatch, acc *appenderCommitConte
// Sample is OOO and OOO handling is enabled
// and the delta is within the OOO tolerance.
var mmapRefs []chunks.ChunkDiskMapperRef
ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger)
ok, chunkCreated, mmapRefs = series.insert(s.ST, s.T, s.V, nil, nil, acc.appendChunkOpts, acc.oooCapMax, a.head.logger)
if chunkCreated {
r, ok := acc.oooMmapMarkers[series.ref]
if !ok || r != nil {
@ -1431,7 +1435,7 @@ func (a *headAppenderBase) commitFloats(b *appendBatch, acc *appenderCommitConte
default:
newlyStale := !value.IsStaleNaN(series.lastValue) && value.IsStaleNaN(s.V)
staleToNonStale := value.IsStaleNaN(series.lastValue) && !value.IsStaleNaN(s.V)
ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts)
ok, chunkCreated = series.append(s.ST, s.T, s.V, a.appendID, acc.appendChunkOpts)
if ok {
if s.T < acc.inOrderMint {
acc.inOrderMint = s.T
@ -1492,7 +1496,8 @@ func (a *headAppenderBase) commitHistograms(b *appendBatch, acc *appenderCommitC
// Sample is OOO and OOO handling is enabled
// and the delta is within the OOO tolerance.
var mmapRefs []chunks.ChunkDiskMapperRef
ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger)
// TODO(krajorama,ywwg): Pass ST when available in WAL.
ok, chunkCreated, mmapRefs = series.insert(0, s.T, 0, s.H, nil, acc.appendChunkOpts, acc.oooCapMax, a.head.logger)
if chunkCreated {
r, ok := acc.oooMmapMarkers[series.ref]
if !ok || r != nil {
@ -1540,7 +1545,8 @@ func (a *headAppenderBase) commitHistograms(b *appendBatch, acc *appenderCommitC
newlyStale = newlyStale && !value.IsStaleNaN(series.lastHistogramValue.Sum)
staleToNonStale = value.IsStaleNaN(series.lastHistogramValue.Sum) && !value.IsStaleNaN(s.H.Sum)
}
ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts)
// TODO(krajorama,ywwg): pass ST when available in WAL.
ok, chunkCreated = series.appendHistogram(0, s.T, s.H, a.appendID, acc.appendChunkOpts)
if ok {
if s.T < acc.inOrderMint {
acc.inOrderMint = s.T
@ -1601,7 +1607,8 @@ func (a *headAppenderBase) commitFloatHistograms(b *appendBatch, acc *appenderCo
// Sample is OOO and OOO handling is enabled
// and the delta is within the OOO tolerance.
var mmapRefs []chunks.ChunkDiskMapperRef
ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger)
// TODO(krajorama,ywwg): Pass ST when available in WAL.
ok, chunkCreated, mmapRefs = series.insert(0, s.T, 0, nil, s.FH, acc.appendChunkOpts, acc.oooCapMax, a.head.logger)
if chunkCreated {
r, ok := acc.oooMmapMarkers[series.ref]
if !ok || r != nil {
@ -1649,7 +1656,8 @@ func (a *headAppenderBase) commitFloatHistograms(b *appendBatch, acc *appenderCo
newlyStale = newlyStale && !value.IsStaleNaN(series.lastFloatHistogramValue.Sum)
staleToNonStale = value.IsStaleNaN(series.lastFloatHistogramValue.Sum) && !value.IsStaleNaN(s.FH.Sum)
}
ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts)
// TODO(krajorama,ywwg): pass ST when available in WAL.
ok, chunkCreated = series.appendFloatHistogram(0, s.T, s.FH, a.appendID, acc.appendChunkOpts)
if ok {
if s.T < acc.inOrderMint {
acc.inOrderMint = s.T
@ -1741,6 +1749,10 @@ func (a *headAppenderBase) Commit() (err error) {
chunkDiskMapper: h.chunkDiskMapper,
chunkRange: h.chunkRange.Load(),
samplesPerChunk: h.opts.SamplesPerChunk,
useXOR2: a.useXOR2,
},
oooEnc: record.Encoder{
EnableSTStorage: a.storeST,
},
}
@ -1796,18 +1808,18 @@ func (a *headAppenderBase) Commit() (err error) {
}
// insert is like append, except it inserts. Used for OOO samples.
func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) {
func (s *memSeries) insert(st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, o chunkOpts, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) {
if s.ooo == nil {
s.ooo = &memSeriesOOOFields{}
}
c := s.ooo.oooHeadChunk
if c == nil || c.chunk.NumSamples() == int(oooCapMax) {
// Note: If no new samples come in then we rely on compaction to clean up stale in-memory OOO chunks.
c, mmapRefs = s.cutNewOOOHeadChunk(t, chunkDiskMapper, logger)
c, mmapRefs = s.cutNewOOOHeadChunk(t, o, logger)
chunkCreated = true
}
ok := c.chunk.Insert(t, v, h, fh)
ok := c.chunk.Insert(st, t, v, h, fh)
if ok {
if chunkCreated || t < c.minTime {
c.minTime = t
@ -1824,19 +1836,19 @@ type chunkOpts struct {
chunkDiskMapper *chunks.ChunkDiskMapper
chunkRange int64
samplesPerChunk int
useXOR2 bool // Selects XOR2 encoding for float chunks.
}
// append adds the sample (t, v) to the series. The caller also has to provide
// the appendID for isolation. (The appendID can be zero, which results in no
// isolation for this append.)
// Series lock must be held when calling.
func (s *memSeries) append(t int64, v float64, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
c, sampleInOrder, chunkCreated := s.appendPreprocessor(t, chunkenc.EncXOR, o)
func (s *memSeries) append(st, t int64, v float64, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
c, sampleInOrder, chunkCreated := s.appendPreprocessor(t, chunkenc.ValFloat.ChunkEncoding(o.useXOR2), o)
if !sampleInOrder {
return sampleInOrder, chunkCreated
}
// TODO(krajorama): pass ST.
s.app.Append(0, t, v)
s.app.Append(st, t, v)
c.maxTime = t
@ -1856,14 +1868,14 @@ func (s *memSeries) append(t int64, v float64, appendID uint64, o chunkOpts) (sa
// In case of recoding the existing chunk, a new chunk is allocated and the old chunk is dropped.
// To keep the meaning of prometheus_tsdb_head_chunks and prometheus_tsdb_head_chunks_created_total
// consistent, we return chunkCreated=false in this case.
func (s *memSeries) appendHistogram(t int64, h *histogram.Histogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
func (s *memSeries) appendHistogram(st, t int64, h *histogram.Histogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
// Head controls the execution of recoding, so that we own the proper
// chunk reference afterwards and mmap used up chunks.
// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
prevApp, _ := s.app.(*chunkenc.HistogramAppender)
c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.EncHistogram, o)
c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.ValHistogram.ChunkEncoding(o.useXOR2), o)
if !sampleInOrder {
return sampleInOrder, chunkCreated
}
@ -1878,8 +1890,7 @@ func (s *memSeries) appendHistogram(t int64, h *histogram.Histogram, appendID ui
prevApp = nil
}
// TODO(krajorama): pass ST.
newChunk, recoded, s.app, _ = s.app.AppendHistogram(prevApp, 0, t, h, false) // false=request a new chunk if needed
newChunk, recoded, s.app, _ = s.app.AppendHistogram(prevApp, st, t, h, false) // false=request a new chunk if needed
s.lastHistogramValue = h
s.lastFloatHistogramValue = nil
@ -1914,14 +1925,14 @@ func (s *memSeries) appendHistogram(t int64, h *histogram.Histogram, appendID ui
// In case of recoding the existing chunk, a new chunk is allocated and the old chunk is dropped.
// To keep the meaning of prometheus_tsdb_head_chunks and prometheus_tsdb_head_chunks_created_total
// consistent, we return chunkCreated=false in this case.
func (s *memSeries) appendFloatHistogram(t int64, fh *histogram.FloatHistogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
func (s *memSeries) appendFloatHistogram(st, t int64, fh *histogram.FloatHistogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
// Head controls the execution of recoding, so that we own the proper
// chunk reference afterwards and mmap used up chunks.
// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
prevApp, _ := s.app.(*chunkenc.FloatHistogramAppender)
c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.EncFloatHistogram, o)
c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.ValFloatHistogram.ChunkEncoding(o.useXOR2), o)
if !sampleInOrder {
return sampleInOrder, chunkCreated
}
@ -1936,8 +1947,7 @@ func (s *memSeries) appendFloatHistogram(t int64, fh *histogram.FloatHistogram,
prevApp = nil
}
// TODO(krajorama): pass ST.
newChunk, recoded, s.app, _ = s.app.AppendFloatHistogram(prevApp, 0, t, fh, false) // False means request a new chunk if needed.
newChunk, recoded, s.app, _ = s.app.AppendFloatHistogram(prevApp, st, t, fh, false) // False means request a new chunk if needed.
s.lastHistogramValue = nil
s.lastFloatHistogramValue = fh
@ -2161,8 +2171,8 @@ func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange
// cutNewOOOHeadChunk cuts a new OOO chunk and m-maps the old chunk.
// The caller must ensure that s is locked and s.ooo is not nil.
func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) {
ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper, logger)
func (s *memSeries) cutNewOOOHeadChunk(mint int64, o chunkOpts, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) {
ref := s.mmapCurrentOOOHeadChunk(o, logger)
s.ooo.oooHeadChunk = &oooHeadChunk{
chunk: NewOOOChunk(),
@ -2174,12 +2184,12 @@ func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.Chunk
}
// s must be locked when calling.
func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) []chunks.ChunkDiskMapperRef {
func (s *memSeries) mmapCurrentOOOHeadChunk(o chunkOpts, logger *slog.Logger) []chunks.ChunkDiskMapperRef {
if s.ooo == nil || s.ooo.oooHeadChunk == nil {
// OOO is not enabled or there is no head chunk, so nothing to m-map here.
return nil
}
chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64)
chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, o.useXOR2)
if err != nil {
handleChunkWriteError(err)
return nil
@ -2190,7 +2200,7 @@ func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMap
logger.Error("Too many OOO chunks, dropping data", "series", s.lset.String())
break
}
chunkRef := chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError)
chunkRef := o.chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError)
chunkRefs = append(chunkRefs, chunkRef)
s.ooo.oooMmappedChunks = append(s.ooo.oooMmappedChunks, &mmappedChunk{
ref: chunkRef,

View File

@ -95,6 +95,8 @@ func (h *Head) appenderV2() *headAppenderV2 {
typesInBatch: h.getTypeMap(),
appendID: appendID,
cleanupAppendIDsBelow: cleanupAppendIDsBelow,
storeST: h.opts.EnableSTStorage.Load(),
useXOR2: h.opts.EnableXOR2Encoding.Load(),
},
}
}
@ -140,7 +142,6 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i
}
}
// TODO(bwplotka): Handle ST natively (as per PROM-60).
if a.head.opts.EnableSTAsZeroSample && st != 0 {
a.bestEffortAppendSTZeroSample(s, ls, st, t, h, fh)
}
@ -177,7 +178,7 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i
// we do not need to check for the difference between "unknown
// series" and "known series with stNone".
}
appErr = a.appendFloat(s, t, v, opts.RejectOutOfOrder)
appErr = a.appendFloat(s, st, t, v, opts.RejectOutOfOrder)
}
// Handle append error, if any.
if appErr != nil {
@ -218,7 +219,7 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i
return storage.SeriesRef(s.ref), partialErr
}
func (a *headAppenderV2) appendFloat(s *memSeries, t int64, v float64, fastRejectOOO bool) error {
func (a *headAppenderV2) appendFloat(s *memSeries, st, t int64, v float64, fastRejectOOO bool) error {
s.Lock()
// TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise
// to skip that sample from the WAL and write only in the WBL.
@ -239,7 +240,7 @@ func (a *headAppenderV2) appendFloat(s *memSeries, t int64, v float64, fastRejec
}
b := a.getCurrentBatch(stFloat, s.ref)
b.floats = append(b.floats, record.RefSample{Ref: s.ref, T: t, V: v})
b.floats = append(b.floats, record.RefSample{Ref: s.ref, ST: st, T: t, V: v})
b.floatSeries = append(b.floatSeries, s)
return nil
}
@ -366,7 +367,7 @@ func (a *headAppenderV2) bestEffortAppendSTZeroSample(s *memSeries, ls labels.La
}
err = a.appendHistogram(s, st, zeroHistogram, true)
default:
err = a.appendFloat(s, st, 0, true)
err = a.appendFloat(s, 0, st, 0, true)
}
if err != nil {

View File

@ -1865,296 +1865,300 @@ func TestHistogramInWALAndMmapChunk_AppenderV2(t *testing.T) {
}
func TestChunkSnapshot_AppenderV2(t *testing.T) {
head, _ := newTestHead(t, 120*4, compression.None, false)
defer func() {
head.opts.EnableMemorySnapshotOnShutdown = false
require.NoError(t, head.Close())
}()
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
head, _ := newTestHead(t, 120*4, compression.None, false)
defer func() {
head.opts.EnableMemorySnapshotOnShutdown = false
require.NoError(t, head.Close())
}()
type ex struct {
seriesLabels labels.Labels
e exemplar.Exemplar
}
numSeries := 10
expSeries := make(map[string][]chunks.Sample)
expHist := make(map[string][]chunks.Sample)
expFloatHist := make(map[string][]chunks.Sample)
expTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
expExemplars := make([]ex, 0)
histograms := tsdbutil.GenerateTestGaugeHistograms(481)
floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481)
newExemplar := func(lbls labels.Labels, ts int64) exemplar.Exemplar {
e := ex{
seriesLabels: lbls,
e: exemplar.Exemplar{
Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())),
Value: rand.Float64(),
Ts: ts,
},
}
expExemplars = append(expExemplars, e)
return e.e
}
checkSamples := func() {
q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
require.Equal(t, expSeries, series)
}
checkHistograms := func() {
q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*"))
require.Equal(t, expHist, series)
}
checkFloatHistograms := func() {
q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*"))
require.Equal(t, expFloatHist, series)
}
checkTombstones := func() {
tr, err := head.Tombstones()
require.NoError(t, err)
actTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error {
for _, itv := range itvs {
actTombstones[ref].Add(itv)
type ex struct {
seriesLabels labels.Labels
e exemplar.Exemplar
}
numSeries := 10
expSeries := make(map[string][]chunks.Sample)
expHist := make(map[string][]chunks.Sample)
expFloatHist := make(map[string][]chunks.Sample)
expTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
expExemplars := make([]ex, 0)
histograms := tsdbutil.GenerateTestGaugeHistograms(481)
floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481)
newExemplar := func(lbls labels.Labels, ts int64) exemplar.Exemplar {
e := ex{
seriesLabels: lbls,
e: exemplar.Exemplar{
Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())),
Value: rand.Float64(),
Ts: ts,
},
}
expExemplars = append(expExemplars, e)
return e.e
}
checkSamples := func() {
q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
require.Equal(t, expSeries, series)
}
checkHistograms := func() {
q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*"))
require.Equal(t, expHist, series)
}
checkFloatHistograms := func() {
q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*"))
require.Equal(t, expFloatHist, series)
}
checkTombstones := func() {
tr, err := head.Tombstones()
require.NoError(t, err)
actTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error {
for _, itv := range itvs {
actTombstones[ref].Add(itv)
}
return nil
}))
require.Equal(t, expTombstones, actTombstones)
}
checkExemplars := func() {
actExemplars := make([]ex, 0, len(expExemplars))
err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error {
actExemplars = append(actExemplars, ex{
seriesLabels: seriesLabels,
e: e,
})
return nil
})
require.NoError(t, err)
// Verifies both existence of right exemplars and order of exemplars in the buffer.
testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})})
}
var (
wlast, woffset int
err error
)
closeHeadAndCheckSnapshot := func() {
require.NoError(t, head.Close())
_, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot)
require.NoError(t, err)
require.Equal(t, wlast, sidx)
require.Equal(t, woffset, soffset)
}
openHeadAndCheckReplay := func() {
w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None)
require.NoError(t, err)
head, err = NewHead(nil, nil, w, nil, head.opts, nil)
require.NoError(t, err)
require.NoError(t, head.Init(math.MinInt64))
checkSamples()
checkHistograms()
checkFloatHistograms()
checkTombstones()
checkExemplars()
}
{ // Initial data that goes into snapshot.
// Add some initial samples with >=1 m-map chunk.
app := head.AppenderV2(context.Background())
for i := 1; i <= numSeries; i++ {
lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
lblStr := lbls.String()
lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
lblsHistStr := lblsHist.String()
lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
lblsFloatHistStr := lblsFloatHist.String()
// 240 samples should m-map at least 1 chunk.
for ts := int64(1); ts <= 240; ts++ {
// Add an exemplar, but only to float sample.
aOpts := storage.AOptions{}
if ts%10 == 0 {
aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
}
val := rand.Float64()
expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
require.NoError(t, err)
hist := histograms[int(ts)]
expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
require.NoError(t, err)
floatHist := floatHistogram[int(ts)]
expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
require.NoError(t, err)
// Create multiple WAL records (commit).
if ts%10 == 0 {
require.NoError(t, app.Commit())
app = head.AppenderV2(context.Background())
}
}
}
require.NoError(t, app.Commit())
// Add some tombstones.
enc := record.Encoder{EnableSTStorage: enableSTStorage}
for i := 1; i <= numSeries; i++ {
ref := storage.SeriesRef(i)
itvs := tombstones.Intervals{
{Mint: 1234, Maxt: 2345},
{Mint: 3456, Maxt: 4567},
}
for _, itv := range itvs {
expTombstones[ref].Add(itv)
}
head.tombstones.AddInterval(ref, itvs...)
err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
{Ref: ref, Intervals: itvs},
}, nil))
require.NoError(t, err)
}
}
// These references should be the ones used for the snapshot.
wlast, woffset, err = head.wal.LastSegmentAndOffset()
require.NoError(t, err)
if woffset != 0 && woffset < 32*1024 {
// The page is always filled before taking the snapshot.
woffset = 32 * 1024
}
{
// Creating snapshot and verifying it.
head.opts.EnableMemorySnapshotOnShutdown = true
closeHeadAndCheckSnapshot() // This will create a snapshot.
// Test the replay of snapshot.
openHeadAndCheckReplay()
}
{ // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk.
// Add more samples.
app := head.AppenderV2(context.Background())
for i := 1; i <= numSeries; i++ {
lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
lblStr := lbls.String()
lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
lblsHistStr := lblsHist.String()
lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
lblsFloatHistStr := lblsFloatHist.String()
// 240 samples should m-map at least 1 chunk.
for ts := int64(241); ts <= 480; ts++ {
// Add an exemplar, but only to float sample.
aOpts := storage.AOptions{}
if ts%10 == 0 {
aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
}
val := rand.Float64()
expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
require.NoError(t, err)
hist := histograms[int(ts)]
expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
require.NoError(t, err)
floatHist := floatHistogram[int(ts)]
expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
require.NoError(t, err)
// Create multiple WAL records (commit).
if ts%10 == 0 {
require.NoError(t, app.Commit())
app = head.AppenderV2(context.Background())
}
}
}
require.NoError(t, app.Commit())
// Add more tombstones.
enc := record.Encoder{EnableSTStorage: enableSTStorage}
for i := 1; i <= numSeries; i++ {
ref := storage.SeriesRef(i)
itvs := tombstones.Intervals{
{Mint: 12345, Maxt: 23456},
{Mint: 34567, Maxt: 45678},
}
for _, itv := range itvs {
expTombstones[ref].Add(itv)
}
head.tombstones.AddInterval(ref, itvs...)
err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
{Ref: ref, Intervals: itvs},
}, nil))
require.NoError(t, err)
}
}
{
// Close Head and verify that new snapshot was not created.
head.opts.EnableMemorySnapshotOnShutdown = false
closeHeadAndCheckSnapshot() // This should not create a snapshot.
// Test the replay of snapshot, m-map chunks, and WAL.
head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
openHeadAndCheckReplay()
}
// Creating another snapshot should delete the older snapshot and replay still works fine.
wlast, woffset, err = head.wal.LastSegmentAndOffset()
require.NoError(t, err)
if woffset != 0 && woffset < 32*1024 {
// The page is always filled before taking the snapshot.
woffset = 32 * 1024
}
{
// Close Head and verify that new snapshot was created.
closeHeadAndCheckSnapshot()
// Verify that there is only 1 snapshot.
files, err := os.ReadDir(head.opts.ChunkDirRoot)
require.NoError(t, err)
snapshots := 0
for i := len(files) - 1; i >= 0; i-- {
fi := files[i]
if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) {
snapshots++
require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name())
}
}
require.Equal(t, 1, snapshots)
// Test the replay of snapshot.
head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
// Disabling exemplars to check that it does not hard fail replay
// https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870.
head.opts.EnableExemplarStorage = false
head.opts.MaxExemplars.Store(0)
expExemplars = expExemplars[:0]
openHeadAndCheckReplay()
require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal))
}
return nil
}))
require.Equal(t, expTombstones, actTombstones)
}
checkExemplars := func() {
actExemplars := make([]ex, 0, len(expExemplars))
err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error {
actExemplars = append(actExemplars, ex{
seriesLabels: seriesLabels,
e: e,
})
return nil
})
require.NoError(t, err)
// Verifies both existence of right exemplars and order of exemplars in the buffer.
testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})})
}
var (
wlast, woffset int
err error
)
closeHeadAndCheckSnapshot := func() {
require.NoError(t, head.Close())
_, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot)
require.NoError(t, err)
require.Equal(t, wlast, sidx)
require.Equal(t, woffset, soffset)
}
openHeadAndCheckReplay := func() {
w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None)
require.NoError(t, err)
head, err = NewHead(nil, nil, w, nil, head.opts, nil)
require.NoError(t, err)
require.NoError(t, head.Init(math.MinInt64))
checkSamples()
checkHistograms()
checkFloatHistograms()
checkTombstones()
checkExemplars()
}
{ // Initial data that goes into snapshot.
// Add some initial samples with >=1 m-map chunk.
app := head.AppenderV2(context.Background())
for i := 1; i <= numSeries; i++ {
lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
lblStr := lbls.String()
lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
lblsHistStr := lblsHist.String()
lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
lblsFloatHistStr := lblsFloatHist.String()
// 240 samples should m-map at least 1 chunk.
for ts := int64(1); ts <= 240; ts++ {
// Add an exemplar, but only to float sample.
aOpts := storage.AOptions{}
if ts%10 == 0 {
aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
}
val := rand.Float64()
expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
require.NoError(t, err)
hist := histograms[int(ts)]
expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
require.NoError(t, err)
floatHist := floatHistogram[int(ts)]
expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
require.NoError(t, err)
// Create multiple WAL records (commit).
if ts%10 == 0 {
require.NoError(t, app.Commit())
app = head.AppenderV2(context.Background())
}
}
}
require.NoError(t, app.Commit())
// Add some tombstones.
var enc record.Encoder
for i := 1; i <= numSeries; i++ {
ref := storage.SeriesRef(i)
itvs := tombstones.Intervals{
{Mint: 1234, Maxt: 2345},
{Mint: 3456, Maxt: 4567},
}
for _, itv := range itvs {
expTombstones[ref].Add(itv)
}
head.tombstones.AddInterval(ref, itvs...)
err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
{Ref: ref, Intervals: itvs},
}, nil))
require.NoError(t, err)
}
}
// These references should be the ones used for the snapshot.
wlast, woffset, err = head.wal.LastSegmentAndOffset()
require.NoError(t, err)
if woffset != 0 && woffset < 32*1024 {
// The page is always filled before taking the snapshot.
woffset = 32 * 1024
}
{
// Creating snapshot and verifying it.
head.opts.EnableMemorySnapshotOnShutdown = true
closeHeadAndCheckSnapshot() // This will create a snapshot.
// Test the replay of snapshot.
openHeadAndCheckReplay()
}
{ // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk.
// Add more samples.
app := head.AppenderV2(context.Background())
for i := 1; i <= numSeries; i++ {
lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
lblStr := lbls.String()
lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
lblsHistStr := lblsHist.String()
lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
lblsFloatHistStr := lblsFloatHist.String()
// 240 samples should m-map at least 1 chunk.
for ts := int64(241); ts <= 480; ts++ {
// Add an exemplar, but only to float sample.
aOpts := storage.AOptions{}
if ts%10 == 0 {
aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
}
val := rand.Float64()
expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
require.NoError(t, err)
hist := histograms[int(ts)]
expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
require.NoError(t, err)
floatHist := floatHistogram[int(ts)]
expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
require.NoError(t, err)
// Create multiple WAL records (commit).
if ts%10 == 0 {
require.NoError(t, app.Commit())
app = head.AppenderV2(context.Background())
}
}
}
require.NoError(t, app.Commit())
// Add more tombstones.
var enc record.Encoder
for i := 1; i <= numSeries; i++ {
ref := storage.SeriesRef(i)
itvs := tombstones.Intervals{
{Mint: 12345, Maxt: 23456},
{Mint: 34567, Maxt: 45678},
}
for _, itv := range itvs {
expTombstones[ref].Add(itv)
}
head.tombstones.AddInterval(ref, itvs...)
err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
{Ref: ref, Intervals: itvs},
}, nil))
require.NoError(t, err)
}
}
{
// Close Head and verify that new snapshot was not created.
head.opts.EnableMemorySnapshotOnShutdown = false
closeHeadAndCheckSnapshot() // This should not create a snapshot.
// Test the replay of snapshot, m-map chunks, and WAL.
head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
openHeadAndCheckReplay()
}
// Creating another snapshot should delete the older snapshot and replay still works fine.
wlast, woffset, err = head.wal.LastSegmentAndOffset()
require.NoError(t, err)
if woffset != 0 && woffset < 32*1024 {
// The page is always filled before taking the snapshot.
woffset = 32 * 1024
}
{
// Close Head and verify that new snapshot was created.
closeHeadAndCheckSnapshot()
// Verify that there is only 1 snapshot.
files, err := os.ReadDir(head.opts.ChunkDirRoot)
require.NoError(t, err)
snapshots := 0
for i := len(files) - 1; i >= 0; i-- {
fi := files[i]
if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) {
snapshots++
require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name())
}
}
require.Equal(t, 1, snapshots)
// Test the replay of snapshot.
head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
// Disabling exemplars to check that it does not hard fail replay
// https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870.
head.opts.EnableExemplarStorage = false
head.opts.MaxExemplars.Store(0)
expExemplars = expExemplars[:0]
openHeadAndCheckReplay()
require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal))
}
}
@ -2919,13 +2923,15 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot_AppenderV2(t *testing.T) {
// TestWBLReplay checks the replay at a low level.
func TestWBLReplay_AppenderV2(t *testing.T) {
for name, scenario := range sampleTypeScenarios {
t.Run(name, func(t *testing.T) {
testWBLReplayAppenderV2(t, scenario)
})
for _, enableSTstorage := range []bool{false, true} {
t.Run(fmt.Sprintf("%s/st-storage=%v", name, enableSTstorage), func(t *testing.T) {
testWBLReplayAppenderV2(t, scenario, enableSTstorage)
})
}
}
}
func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario) {
func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario, enableSTstorage bool) {
dir := t.TempDir()
wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, compression.Snappy)
require.NoError(t, err)
@ -2936,6 +2942,8 @@ func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario) {
opts.ChunkRange = 1000
opts.ChunkDirRoot = dir
opts.OutOfOrderTimeWindow.Store(30 * time.Minute.Milliseconds())
opts.EnableSTStorage.Store(enableSTstorage)
opts.EnableXOR2Encoding.Store(enableSTstorage)
h, err := NewHead(nil, nil, wal, oooWlog, opts, nil)
require.NoError(t, err)
@ -2987,7 +2995,7 @@ func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario) {
require.False(t, ok)
require.NotNil(t, ms)
chks, err := ms.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64)
chks, err := ms.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, h.opts.EnableXOR2Encoding.Load())
require.NoError(t, err)
require.Len(t, chks, 1)
@ -4748,3 +4756,135 @@ func TestHeadAppenderV2_Append_HistogramStalenessConversionMetrics(t *testing.T)
})
}
}
// TestHeadAppender_STStorage verifies that when EnableSTStorage is true,
// start timestamps are properly stored in chunks and returned by queries.
// This test uses AppenderV2 which has native ST support.
func TestHeadAppenderV2_STStorage(t *testing.T) {
testHistogram := tsdbutil.GenerateTestHistogram(1)
testHistogram.CounterResetHint = histogram.NotCounterReset
type sampleData struct {
st int64
ts int64
fSample float64
h *histogram.Histogram
}
testCases := []struct {
name string
samples []sampleData
expectedSTs []int64
isHistogram bool
}{
{
name: "Float samples with ST",
samples: []sampleData{
{st: 10, ts: 100, fSample: 1.0},
{st: 20, ts: 200, fSample: 2.0},
{st: 30, ts: 300, fSample: 3.0},
},
expectedSTs: []int64{10, 20, 30},
isHistogram: false,
},
{
name: "Float samples with varying ST",
samples: []sampleData{
{st: 5, ts: 100, fSample: 1.0},
{st: 5, ts: 200, fSample: 2.0},
{st: 150, ts: 300, fSample: 3.0},
},
expectedSTs: []int64{5, 5, 150},
isHistogram: false,
},
{
name: "Histogram samples",
samples: []sampleData{
{st: 10, ts: 100, h: testHistogram},
{st: 20, ts: 200, h: testHistogram},
{st: 30, ts: 300, h: testHistogram},
},
// Histograms don't support ST storage yet, should return 0.
expectedSTs: []int64{0, 0, 0},
isHistogram: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
opts := newTestHeadDefaultOptions(DefaultBlockDuration, false)
opts.EnableSTStorage.Store(true)
opts.EnableXOR2Encoding.Store(true)
h, _ := newTestHeadWithOptions(t, compression.None, opts)
lbls := labels.FromStrings("foo", "bar")
a := h.AppenderV2(context.Background())
for _, s := range tc.samples {
_, err := a.Append(0, lbls, s.st, s.ts, s.fSample, s.h, nil, storage.AOptions{})
require.NoError(t, err)
}
require.NoError(t, a.Commit())
// Verify ST values are stored in chunks.
ctx := context.Background()
idxReader, err := h.Index()
require.NoError(t, err)
defer idxReader.Close()
chkReader, err := h.Chunks()
require.NoError(t, err)
defer chkReader.Close()
p, err := idxReader.Postings(ctx, "foo", "bar")
require.NoError(t, err)
var lblBuilder labels.ScratchBuilder
require.True(t, p.Next())
sRef := p.At()
var chkMetas []chunks.Meta
require.NoError(t, idxReader.Series(sRef, &lblBuilder, &chkMetas))
var actualSTs []int64
for _, meta := range chkMetas {
chk, iterable, err := chkReader.ChunkOrIterable(meta)
require.NoError(t, err)
require.Nil(t, iterable)
it := chk.Iterator(nil)
for it.Next() != chunkenc.ValNone {
st := it.AtST()
actualSTs = append(actualSTs, st)
}
require.NoError(t, it.Err())
}
if tc.isHistogram {
require.Equal(t, tc.expectedSTs, actualSTs, "Histogram samples should return 0 for ST")
} else {
require.Equal(t, tc.expectedSTs, actualSTs, "Float samples should have ST stored")
}
// Also verify via querier.
q, err := NewBlockQuerier(h, math.MinInt64, math.MaxInt64)
require.NoError(t, err)
defer q.Close()
ss := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.True(t, ss.Next())
series := ss.At()
require.NoError(t, ss.Err())
seriesIt := series.Iterator(nil)
var queriedSTs []int64
for seriesIt.Next() != chunkenc.ValNone {
st := seriesIt.AtST()
queriedSTs = append(queriedSTs, st)
}
require.NoError(t, seriesIt.Err())
require.Equal(t, tc.expectedSTs, queriedSTs, "Querier should return same ST values as chunk iterator")
})
}
}

View File

@ -33,7 +33,7 @@ func TestMemSeries_chunk(t *testing.T) {
appendSamples := func(t *testing.T, s *memSeries, start, end int64, cdm *chunks.ChunkDiskMapper) {
for i := start; i < end; i += chunkStep {
ok, _ := s.append(i, float64(i), 0, chunkOpts{
ok, _ := s.append(0, i, float64(i), 0, chunkOpts{
chunkDiskMapper: cdm,
chunkRange: chunkRange,
samplesPerChunk: DefaultSamplesPerChunk,

File diff suppressed because it is too large Load Diff

View File

@ -169,7 +169,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
return
}
decoded <- series
case record.Samples:
case record.Samples, record.SamplesV2:
samples := h.wlReplaySamplesPool.Get()[:0]
samples, err = dec.Samples(r.Record(), samples)
if err != nil {
@ -646,6 +646,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
chunkDiskMapper: h.chunkDiskMapper,
chunkRange: h.chunkRange.Load(),
samplesPerChunk: h.opts.SamplesPerChunk,
useXOR2: h.opts.EnableXOR2Encoding.Load(),
}
for in := range wp.input {
@ -676,7 +677,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
h.numStaleSeries.Dec()
}
if _, chunkCreated := ms.append(s.T, s.V, 0, appendChunkOpts); chunkCreated {
if _, chunkCreated := ms.append(s.ST, s.T, s.V, 0, appendChunkOpts); chunkCreated {
h.metrics.chunksCreated.Inc()
h.metrics.chunks.Inc()
_ = ms.mmapChunks(h.chunkDiskMapper)
@ -713,14 +714,16 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
newlyStale = newlyStale && !value.IsStaleNaN(ms.lastHistogramValue.Sum)
staleToNonStale = value.IsStaleNaN(ms.lastHistogramValue.Sum) && !value.IsStaleNaN(s.h.Sum)
}
_, chunkCreated = ms.appendHistogram(s.t, s.h, 0, appendChunkOpts)
// TODO(krajorama,ywwg): Pass ST when available in WBL.
_, chunkCreated = ms.appendHistogram(0, s.t, s.h, 0, appendChunkOpts)
} else {
newlyStale = value.IsStaleNaN(s.fh.Sum)
if ms.lastFloatHistogramValue != nil {
newlyStale = newlyStale && !value.IsStaleNaN(ms.lastFloatHistogramValue.Sum)
staleToNonStale = value.IsStaleNaN(ms.lastFloatHistogramValue.Sum) && !value.IsStaleNaN(s.fh.Sum)
}
_, chunkCreated = ms.appendFloatHistogram(s.t, s.fh, 0, appendChunkOpts)
// TODO(krajorama,ywwg): Pass ST when available in WBL.
_, chunkCreated = ms.appendFloatHistogram(0, s.t, s.fh, 0, appendChunkOpts)
}
if newlyStale {
h.numStaleSeries.Inc()
@ -809,7 +812,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
var err error
rec := r.Record()
switch dec.Type(rec) {
case record.Samples:
case record.Samples, record.SamplesV2:
samples := h.wlReplaySamplesPool.Get()[:0]
samples, err = dec.Samples(rec, samples)
if err != nil {
@ -1090,6 +1093,12 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesR
var unknownSampleRefs, unknownHistogramRefs uint64
oooCapMax := h.opts.OutOfOrderCapMax.Load()
appendChunkOpts := chunkOpts{
chunkDiskMapper: h.chunkDiskMapper,
chunkRange: h.chunkRange.Load(),
samplesPerChunk: h.opts.SamplesPerChunk,
useXOR2: h.opts.EnableXOR2Encoding.Load(),
}
// We don't check for minValidTime for ooo samples.
mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
for in := range wp.input {
@ -1109,7 +1118,7 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesR
missingSeries[s.Ref] = struct{}{}
continue
}
ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax, h.logger)
ok, chunkCreated, _ := ms.insert(s.ST, s.T, s.V, nil, nil, appendChunkOpts, oooCapMax, h.logger)
if chunkCreated {
h.metrics.chunksCreated.Inc()
h.metrics.chunks.Inc()
@ -1137,9 +1146,11 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesR
var chunkCreated bool
var ok bool
if s.h != nil {
ok, chunkCreated, _ = ms.insert(s.t, 0, s.h, nil, h.chunkDiskMapper, oooCapMax, h.logger)
// TODO(krajorama,ywwg): Pass ST when available in WBL.
ok, chunkCreated, _ = ms.insert(0, s.t, 0, s.h, nil, appendChunkOpts, oooCapMax, h.logger)
} else {
ok, chunkCreated, _ = ms.insert(s.t, 0, nil, s.fh, h.chunkDiskMapper, oooCapMax, h.logger)
// TODO(krajorama,ywwg): Pass ST when available in WBL.
ok, chunkCreated, _ = ms.insert(0, s.t, 0, nil, s.fh, appendChunkOpts, oooCapMax, h.logger)
}
if chunkCreated {
h.metrics.chunksCreated.Inc()
@ -1253,7 +1264,7 @@ func decodeSeriesFromChunkSnapshot(d *record.Decoder, b []byte) (csr chunkSnapsh
csr.mc.chunk = chk
switch enc {
case chunkenc.EncXOR:
case chunkenc.EncXOR, chunkenc.EncXOR2:
// Backwards-compatibility for old sampleBuf which had last 4 samples.
for range 3 {
_ = dec.Be64int64()
@ -1413,7 +1424,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) {
// Assuming 100 bytes (overestimate) per exemplar, that's ~1MB.
maxExemplarsPerRecord := 10000
batch := make([]record.RefExemplar, 0, maxExemplarsPerRecord)
enc := record.Encoder{}
enc := record.Encoder{EnableSTStorage: h.opts.EnableSTStorage.Load()}
flushExemplars := func() error {
if len(batch) == 0 {
return nil

View File

@ -34,14 +34,13 @@ func NewOOOChunk() *OOOChunk {
// Insert inserts the sample such that order is maintained.
// Returns false if insert was not possible due to the same timestamp already existing.
func (o *OOOChunk) Insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram) bool {
func (o *OOOChunk) Insert(st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram) bool {
// Although out-of-order samples can be out-of-order amongst themselves, we
// are opinionated and expect them to be usually in-order meaning we could
// try to append at the end first if the new timestamp is higher than the
// last known timestamp.
if len(o.samples) == 0 || t > o.samples[len(o.samples)-1].t {
// TODO(krajorama): pass ST.
o.samples = append(o.samples, sample{0, t, v, h, fh})
o.samples = append(o.samples, sample{st, t, v, h, fh})
return true
}
@ -50,8 +49,7 @@ func (o *OOOChunk) Insert(t int64, v float64, h *histogram.Histogram, fh *histog
if i >= len(o.samples) {
// none found. append it at the end
// TODO(krajorama): pass ST.
o.samples = append(o.samples, sample{0, t, v, h, fh})
o.samples = append(o.samples, sample{st, t, v, h, fh})
return true
}
@ -63,8 +61,7 @@ func (o *OOOChunk) Insert(t int64, v float64, h *histogram.Histogram, fh *histog
// Expand length by 1 to make room. use a zero sample, we will overwrite it anyway.
o.samples = append(o.samples, sample{})
copy(o.samples[i+1:], o.samples[i:])
// TODO(krajorama): pass ST.
o.samples[i] = sample{0, t, v, h, fh}
o.samples[i] = sample{st, t, v, h, fh}
return true
}
@ -76,7 +73,7 @@ func (o *OOOChunk) NumSamples() int {
// ToEncodedChunks returns chunks with the samples in the OOOChunk.
//
//nolint:revive
func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error) {
func (o *OOOChunk) ToEncodedChunks(mint, maxt int64, useXOR2 bool) (chks []memChunk, err error) {
if len(o.samples) == 0 {
return nil, nil
}
@ -96,10 +93,13 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
if s.t > maxt {
break
}
encoding := chunkenc.EncXOR
if s.h != nil {
encoding := chunkenc.ValFloat.ChunkEncoding(useXOR2)
switch {
case s.h != nil:
// TODO(krajorama): use ST capable histogram chunk.
encoding = chunkenc.EncHistogram
} else if s.fh != nil {
case s.fh != nil:
// TODO(krajorama): use ST capable float histogram chunk.
encoding = chunkenc.EncFloatHistogram
}
@ -111,15 +111,11 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
chks = append(chks, memChunk{chunk, cmint, cmaxt, nil})
}
cmint = s.t
switch encoding {
case chunkenc.EncXOR:
chunk = chunkenc.NewXORChunk()
case chunkenc.EncHistogram:
chunk = chunkenc.NewHistogramChunk()
case chunkenc.EncFloatHistogram:
chunk = chunkenc.NewFloatHistogramChunk()
default:
chunk = chunkenc.NewXORChunk()
chunk, err = chunkenc.NewEmptyChunk(encoding)
if err != nil {
// This should never happen. No point using a default type as
// calling the wrong append function would panic.
return chks, err
}
app, err = chunk.Appender()
if err != nil {
@ -127,18 +123,17 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
}
}
switch encoding {
case chunkenc.EncXOR:
// TODO(krajorama): pass ST.
app.Append(0, s.t, s.f)
case chunkenc.EncXOR, chunkenc.EncXOR2:
app.Append(s.st, s.t, s.f)
case chunkenc.EncHistogram:
// TODO(krajorama): handle ST capable histogram chunk.
// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
prevHApp, _ := prevApp.(*chunkenc.HistogramAppender)
var (
newChunk chunkenc.Chunk
recoded bool
)
// TODO(krajorama): pass ST.
newChunk, recoded, app, _ = app.AppendHistogram(prevHApp, 0, s.t, s.h, false)
newChunk, recoded, app, _ = app.AppendHistogram(prevHApp, s.st, s.t, s.h, false)
if newChunk != nil { // A new chunk was allocated.
if !recoded {
chks = append(chks, memChunk{chunk, cmint, cmaxt, nil})
@ -147,14 +142,14 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
chunk = newChunk
}
case chunkenc.EncFloatHistogram:
// TODO(krajorama): handle ST capable float histogram chunk.
// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
prevHApp, _ := prevApp.(*chunkenc.FloatHistogramAppender)
var (
newChunk chunkenc.Chunk
recoded bool
)
// TODO(krajorama): pass ST.
newChunk, recoded, app, _ = app.AppendFloatHistogram(prevHApp, 0, s.t, s.fh, false)
newChunk, recoded, app, _ = app.AppendFloatHistogram(prevHApp, s.st, s.t, s.fh, false)
if newChunk != nil { // A new chunk was allocated.
if !recoded {
chks = append(chks, memChunk{chunk, cmint, cmaxt, nil})

View File

@ -77,7 +77,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
*chks = (*chks)[:0]
if s.ooo != nil {
return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, oh.inoMint, chks)
return getOOOSeriesChunks(s, oh.head.opts.EnableXOR2Encoding.Load(), oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, oh.inoMint, chks)
}
*chks = appendSeriesChunks(s, oh.inoMint, oh.maxt, *chks)
return nil
@ -88,7 +88,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
//
// maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then
// the oooHeadChunk will not be considered.
func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, inoMint int64, chks *[]chunks.Meta) error {
func getOOOSeriesChunks(s *memSeries, useXOR2 bool, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, inoMint int64, chks *[]chunks.Meta) error {
tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks))
addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) {
@ -106,7 +106,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
if c.OverlapsClosedInterval(mint, maxt) && maxMmapRef == 0 {
ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(len(s.ooo.oooMmappedChunks))))
if len(c.chunk.samples) > 0 { // Empty samples happens in tests, at least.
chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(c.minTime, c.maxTime)
chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(c.minTime, c.maxTime, useXOR2)
if err != nil {
handleChunkWriteError(err)
return nil
@ -347,7 +347,7 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead,
}
var lastMmapRef chunks.ChunkDiskMapperRef
mmapRefs := ms.mmapCurrentOOOHeadChunk(head.chunkDiskMapper, head.logger)
mmapRefs := ms.mmapCurrentOOOHeadChunk(chunkOpts{chunkDiskMapper: head.chunkDiskMapper, useXOR2: head.opts.EnableXOR2Encoding.Load()}, head.logger)
if len(mmapRefs) == 0 && len(ms.ooo.oooMmappedChunks) > 0 {
// Nothing was m-mapped. So take the mmapRef from the existing slice if it exists.
mmapRefs = []chunks.ChunkDiskMapperRef{ms.ooo.oooMmappedChunks[len(ms.ooo.oooMmappedChunks)-1].ref}
@ -481,7 +481,7 @@ func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *l
return nil
}
return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, 0, chks)
return getOOOSeriesChunks(s, ir.ch.head.opts.EnableXOR2Encoding.Load(), ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, 0, chks)
}
func (*OOOCompactionHeadIndexReader) SortedLabelValues(_ context.Context, _ string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, error) {

View File

@ -31,10 +31,11 @@ const testMaxSize int = 32
func valEven(pos int) int64 { return int64(pos*2 + 2) } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values
func valOdd(pos int) int64 { return int64(pos*2 + 1) } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals.
func makeEvenSampleSlice(n int, sampleFunc func(ts int64) sample) []sample {
func makeEvenSampleSlice(n int, sampleFunc func(st, ts int64) sample) []sample {
s := make([]sample, n)
for i := range n {
s[i] = sampleFunc(valEven(i))
ts := valEven(i)
s[i] = sampleFunc(ts, ts) // Use ts as st for consistency
}
return s
}
@ -43,23 +44,50 @@ func makeEvenSampleSlice(n int, sampleFunc func(ts int64) sample) []sample {
// - Number of pre-existing samples anywhere from 0 to testMaxSize-1.
// - Insert new sample before first pre-existing samples, after the last, and anywhere in between.
// - With a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves.
// - With st=0 and st!=0 to verify ordering is based on sample.t, not sample.st.
func TestOOOInsert(t *testing.T) {
scenarios := map[string]struct {
sampleFunc func(ts int64) sample
sampleFunc func(st, ts int64) sample
}{
"float": {
sampleFunc: func(ts int64) sample {
return sample{t: ts, f: float64(ts)}
"float st=0": {
sampleFunc: func(st, ts int64) sample {
return sample{st: 0, t: ts, f: float64(ts)}
},
},
"integer histogram": {
sampleFunc: func(ts int64) sample {
return sample{t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
"float st=ts": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts, t: ts, f: float64(ts)}
},
},
"float histogram": {
sampleFunc: func(ts int64) sample {
return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
"float st=ts-100": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts - 100, t: ts, f: float64(ts)}
},
},
"float st descending while t ascending": {
// st values go in opposite direction of t to ensure ordering is by t.
sampleFunc: func(st, ts int64) sample {
return sample{st: 1000 - ts, t: ts, f: float64(ts)}
},
},
"integer histogram st=0": {
sampleFunc: func(st, ts int64) sample {
return sample{st: 0, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
},
},
"integer histogram st=ts": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
},
},
"float histogram st=0": {
sampleFunc: func(st, ts int64) sample {
return sample{st: 0, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
},
},
"float histogram st=ts": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
},
},
}
@ -71,7 +99,7 @@ func TestOOOInsert(t *testing.T) {
}
func testOOOInsert(t *testing.T,
sampleFunc func(ts int64) sample,
sampleFunc func(st, ts int64) sample,
) {
for numPreExisting := 0; numPreExisting <= testMaxSize; numPreExisting++ {
// For example, if we have numPreExisting 2, then:
@ -84,19 +112,22 @@ func testOOOInsert(t *testing.T,
chunk := NewOOOChunk()
chunk.samples = make([]sample, numPreExisting)
chunk.samples = makeEvenSampleSlice(numPreExisting, sampleFunc)
newSample := sampleFunc(valOdd(insertPos))
chunk.Insert(newSample.t, newSample.f, newSample.h, newSample.fh)
ts := valOdd(insertPos)
newSample := sampleFunc(ts, ts) // Use ts as st for consistency
chunk.Insert(newSample.st, newSample.t, newSample.f, newSample.h, newSample.fh)
var expSamples []sample
// Our expected new samples slice, will be first the original samples.
for i := 0; i < insertPos; i++ {
expSamples = append(expSamples, sampleFunc(valEven(i)))
ts := valEven(i)
expSamples = append(expSamples, sampleFunc(ts, ts))
}
// Then the new sample.
expSamples = append(expSamples, newSample)
// Followed by any original samples that were pushed back by the new one.
for i := insertPos; i < numPreExisting; i++ {
expSamples = append(expSamples, sampleFunc(valEven(i)))
ts := valEven(i)
expSamples = append(expSamples, sampleFunc(ts, ts))
}
require.Equal(t, expSamples, chunk.samples, "numPreExisting %d, insertPos %d", numPreExisting, insertPos)
@ -107,23 +138,50 @@ func testOOOInsert(t *testing.T,
// TestOOOInsertDuplicate tests the correct behavior when inserting a sample that is a duplicate of any
// pre-existing samples, with between 1 and testMaxSize pre-existing samples and
// with a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves.
// With st=0 and st!=0 to verify duplicate detection is based on sample.t, not sample.st.
func TestOOOInsertDuplicate(t *testing.T) {
scenarios := map[string]struct {
sampleFunc func(ts int64) sample
sampleFunc func(st, ts int64) sample
}{
"float": {
sampleFunc: func(ts int64) sample {
return sample{t: ts, f: float64(ts)}
"float st=0": {
sampleFunc: func(st, ts int64) sample {
return sample{st: 0, t: ts, f: float64(ts)}
},
},
"integer histogram": {
sampleFunc: func(ts int64) sample {
return sample{t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
"float st=ts": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts, t: ts, f: float64(ts)}
},
},
"float histogram": {
sampleFunc: func(ts int64) sample {
return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
"float st=ts-100": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts - 100, t: ts, f: float64(ts)}
},
},
"float st descending while t ascending": {
// st values go in opposite direction of t to ensure duplicate detection is by t.
sampleFunc: func(st, ts int64) sample {
return sample{st: 1000 - ts, t: ts, f: float64(ts)}
},
},
"integer histogram st=0": {
sampleFunc: func(st, ts int64) sample {
return sample{st: 0, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
},
},
"integer histogram st=ts": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
},
},
"float histogram st=0": {
sampleFunc: func(st, ts int64) sample {
return sample{st: 0, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
},
},
"float histogram st=ts": {
sampleFunc: func(st, ts int64) sample {
return sample{st: ts, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
},
},
}
@ -135,7 +193,7 @@ func TestOOOInsertDuplicate(t *testing.T) {
}
func testOOOInsertDuplicate(t *testing.T,
sampleFunc func(ts int64) sample,
sampleFunc func(st, ts int64) sample,
) {
for num := 1; num <= testMaxSize; num++ {
for dupPos := 0; dupPos < num; dupPos++ {
@ -145,7 +203,7 @@ func testOOOInsertDuplicate(t *testing.T,
dupSample := chunk.samples[dupPos]
dupSample.f = 0.123
ok := chunk.Insert(dupSample.t, dupSample.f, dupSample.h, dupSample.fh)
ok := chunk.Insert(dupSample.st, dupSample.t, dupSample.f, dupSample.h, dupSample.fh)
expSamples := makeEvenSampleSlice(num, sampleFunc) // We expect no change.
require.False(t, ok)
@ -252,17 +310,17 @@ func TestOOOChunks_ToEncodedChunks(t *testing.T) {
for _, s := range tc.samples {
switch s.Type() {
case chunkenc.ValFloat:
oooChunk.Insert(s.t, s.f, nil, nil)
oooChunk.Insert(s.st, s.t, s.f, nil, nil)
case chunkenc.ValHistogram:
oooChunk.Insert(s.t, 0, s.h.Copy(), nil)
oooChunk.Insert(s.st, s.t, 0, s.h.Copy(), nil)
case chunkenc.ValFloatHistogram:
oooChunk.Insert(s.t, 0, nil, s.fh.Copy())
oooChunk.Insert(s.st, s.t, 0, nil, s.fh.Copy())
default:
t.Fatalf("unexpected sample type %d", s.Type())
}
}
chunks, err := oooChunk.ToEncodedChunks(math.MinInt64, math.MaxInt64)
chunks, err := oooChunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, false)
require.NoError(t, err)
require.Len(t, chunks, len(tc.expectedChunks), "number of chunks")
sampleIndex := 0
@ -308,3 +366,87 @@ func TestOOOChunks_ToEncodedChunks(t *testing.T) {
})
}
}
// TestOOOChunks_ToEncodedChunks_WithST tests ToEncodedChunks with useXOR2=true and useXOR2=false for float samples.
// When useXOR2=true, st values are preserved; when useXOR2=false, AtST() returns 0.
// TODO(@krajorama): Add histogram test cases once ST storage is implemented for histograms.
func TestOOOChunks_ToEncodedChunks_WithST(t *testing.T) {
testCases := map[string]struct {
samples []sample
}{
"floats with st=0": {
samples: []sample{
{st: 0, t: 1000, f: 43.0},
{st: 0, t: 1100, f: 42.0},
},
},
"floats with st=t": {
samples: []sample{
{st: 1000, t: 1000, f: 43.0},
{st: 1100, t: 1100, f: 42.0},
},
},
"floats with st=t-100": {
samples: []sample{
{st: 900, t: 1000, f: 43.0},
{st: 1000, t: 1100, f: 42.0},
},
},
"floats with varying st": {
samples: []sample{
{st: 500, t: 1000, f: 43.0},
{st: 1100, t: 1100, f: 42.0}, // st == t
{st: 0, t: 1200, f: 41.0}, // st == 0
},
},
}
storageScenarios := []struct {
name string
useXOR2 bool
expectedEncoding chunkenc.Encoding
}{
{"useXOR2=true", true, chunkenc.EncXOR2},
{"useXOR2=false", false, chunkenc.EncXOR},
}
for name, tc := range testCases {
for _, ss := range storageScenarios {
t.Run(name+"/"+ss.name, func(t *testing.T) {
oooChunk := OOOChunk{}
for _, s := range tc.samples {
oooChunk.Insert(s.st, s.t, s.f, nil, nil)
}
chunks, err := oooChunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, ss.useXOR2)
require.NoError(t, err)
require.Len(t, chunks, 1, "number of chunks")
c := chunks[0]
require.Equal(t, ss.expectedEncoding, c.chunk.Encoding(), "chunk encoding")
require.Equal(t, tc.samples[0].t, c.minTime, "chunk minTime")
require.Equal(t, tc.samples[len(tc.samples)-1].t, c.maxTime, "chunk maxTime")
// Verify samples can be read back with correct st and t values.
it := c.chunk.Iterator(nil)
sampleIndex := 0
for it.Next() == chunkenc.ValFloat {
gotT, gotF := it.At()
gotST := it.AtST()
if ss.useXOR2 {
// When useXOR2=true, st values should be preserved.
require.Equal(t, tc.samples[sampleIndex].st, gotST, "sample %d st", sampleIndex)
} else {
// When useXOR2=false, AtST() should return 0.
require.Equal(t, int64(0), gotST, "sample %d st should be 0 when useXOR2=false", sampleIndex)
}
require.Equal(t, tc.samples[sampleIndex].t, gotT, "sample %d t", sampleIndex)
require.Equal(t, tc.samples[sampleIndex].f, gotF, "sample %d f", sampleIndex)
sampleIndex++
}
require.Equal(t, len(tc.samples), sampleIndex, "number of samples")
})
}
}
}

View File

@ -866,7 +866,6 @@ func (p *populateWithDelChunkSeriesIterator) Next() bool {
// populateCurrForSingleChunk sets the fields within p.currMetaWithChunk. This
// should be called if the samples in p.currDelIter only form one chunk.
// TODO(krajorama): test ST when chunks support it.
func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool {
valueType := p.currDelIter.Next()
if valueType == chunkenc.ValNone {
@ -885,60 +884,47 @@ func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool {
st, t int64
err error
)
switch valueType {
case chunkenc.ValHistogram:
newChunk = chunkenc.NewHistogramChunk()
if app, err = newChunk.Appender(); err != nil {
newChunk, err = chunkenc.NewEmptyChunk(p.currMeta.Chunk.Encoding())
if err != nil {
p.err = fmt.Errorf("create new chunk while re-encoding: %w", err)
return false
}
app, err = newChunk.Appender()
if err != nil {
p.err = fmt.Errorf("create appender while re-encoding: %w", err)
return false
}
loop:
for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
if vt != valueType {
err = fmt.Errorf("found value type %v in chunk with %v", vt, valueType)
break
}
for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
if vt != chunkenc.ValHistogram {
err = fmt.Errorf("found value type %v in histogram chunk", vt)
break
}
var h *histogram.Histogram
t, h = p.currDelIter.AtHistogram(nil)
st = p.currDelIter.AtST()
_, _, app, err = app.AppendHistogram(nil, st, t, h, true)
if err != nil {
break
}
}
case chunkenc.ValFloat:
newChunk = chunkenc.NewXORChunk()
if app, err = newChunk.Appender(); err != nil {
break
}
for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
if vt != chunkenc.ValFloat {
err = fmt.Errorf("found value type %v in float chunk", vt)
break
}
st = p.currDelIter.AtST()
switch vt {
case chunkenc.ValFloat:
var v float64
t, v = p.currDelIter.At()
st = p.currDelIter.AtST()
app.Append(st, t, v)
}
case chunkenc.ValFloatHistogram:
newChunk = chunkenc.NewFloatHistogramChunk()
if app, err = newChunk.Appender(); err != nil {
break
}
for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
if vt != chunkenc.ValFloatHistogram {
err = fmt.Errorf("found value type %v in histogram chunk", vt)
break
case chunkenc.ValHistogram:
var h *histogram.Histogram
t, h = p.currDelIter.AtHistogram(nil)
_, _, app, err = app.AppendHistogram(nil, st, t, h, true)
if err != nil {
break loop
}
case chunkenc.ValFloatHistogram:
var h *histogram.FloatHistogram
t, h = p.currDelIter.AtFloatHistogram(nil)
st = p.currDelIter.AtST()
_, _, app, err = app.AppendFloatHistogram(nil, st, t, h, true)
if err != nil {
break
break loop
}
default:
err = fmt.Errorf("populateCurrForSingleChunk: value type %v unsupported", valueType)
break loop
}
default:
err = fmt.Errorf("populateCurrForSingleChunk: value type %v unsupported", valueType)
}
if err != nil {
@ -958,7 +944,6 @@ func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool {
// populateChunksFromIterable reads the samples from currDelIter to create
// chunks for chunksFromIterable. It also sets p.currMetaWithChunk to the first
// chunk.
// TODO(krajorama): test ST when chunks support it.
func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
p.chunksFromIterable = p.chunksFromIterable[:0]
p.chunksFromIterableIdx = -1
@ -982,30 +967,37 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
app chunkenc.Appender
newChunk chunkenc.Chunk
recoded bool
err error
)
prevValueType := chunkenc.ValNone
hasTS := false
for currentValueType := firstValueType; currentValueType != chunkenc.ValNone; currentValueType = p.currDelIter.Next() {
var (
newChunk chunkenc.Chunk
recoded bool
)
// Check if the encoding has changed (i.e. we need to create a new
// chunk as chunks can't have multiple encoding types).
// For the first sample, the following condition will always be true as
// ValNone != ValFloat | ValHistogram | ValFloatHistogram.
if currentValueType != prevValueType {
// Also if we need to store start time (ST), but the current chunk is
// not capable.
st = p.currDelIter.AtST()
needTS := st != 0
if currentValueType != prevValueType || !hasTS && needTS {
if prevValueType != chunkenc.ValNone {
p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt})
}
cmint = p.currDelIter.AtT()
if currentChunk, err = currentValueType.NewChunk(); err != nil {
if currentChunk, err = currentValueType.NewChunk(needTS); err != nil {
break
}
if app, err = currentChunk.Appender(); err != nil {
break
}
hasTS = needTS
}
switch currentValueType {
@ -1013,14 +1005,12 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
{
var v float64
t, v = p.currDelIter.At()
st = p.currDelIter.AtST()
app.Append(st, t, v)
}
case chunkenc.ValHistogram:
{
var v *histogram.Histogram
t, v = p.currDelIter.AtHistogram(nil)
st = p.currDelIter.AtST()
// No need to set prevApp as AppendHistogram will set the
// counter reset header for the appender that's returned.
newChunk, recoded, app, err = app.AppendHistogram(nil, st, t, v, false)
@ -1029,7 +1019,6 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
{
var v *histogram.FloatHistogram
t, v = p.currDelIter.AtFloatHistogram(nil)
st = p.currDelIter.AtST()
// No need to set prevApp as AppendHistogram will set the
// counter reset header for the appender that's returned.
newChunk, recoded, app, err = app.AppendFloatHistogram(nil, st, t, v, false)

View File

@ -2025,6 +2025,207 @@ func TestPopulateWithDelSeriesIterator_NextWithMinTime(t *testing.T) {
}
}
// TestPopulateWithDelSeriesIterator_WithST tests that ST (start time) values are
// correctly preserved when iterating through chunks with ST support.
func TestPopulateWithDelSeriesIterator_WithST(t *testing.T) {
// Samples with non-zero ST values to test ST preservation.
samplesWithST := [][]chunks.Sample{
{
sample{st: 100, t: 1000, f: 1.0},
sample{st: 200, t: 2000, f: 2.0},
sample{st: 300, t: 3000, f: 3.0},
},
}
// Samples with varying ST patterns.
samplesVaryingST := [][]chunks.Sample{
{
sample{st: 0, t: 1000, f: 1.0}, // st=0
sample{st: 1500, t: 1500, f: 1.5}, // st=t
sample{st: 1900, t: 2000, f: 2.0}, // st=t-100
sample{st: 500, t: 3000, f: 3.0}, // st < t
},
}
cases := []struct {
name string
samples [][]chunks.Sample
expected []chunks.Sample
}{
{
name: "all samples have non-zero ST",
samples: samplesWithST,
expected: []chunks.Sample{
sample{st: 100, t: 1000, f: 1.0},
sample{st: 200, t: 2000, f: 2.0},
sample{st: 300, t: 3000, f: 3.0},
},
},
{
name: "samples with varying ST patterns",
samples: samplesVaryingST,
expected: []chunks.Sample{
sample{st: 0, t: 1000, f: 1.0},
sample{st: 1500, t: 1500, f: 1.5},
sample{st: 1900, t: 2000, f: 2.0},
sample{st: 500, t: 3000, f: 3.0},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// Test with chunks (not iterables).
t.Run("chunks", func(t *testing.T) {
f, chkMetas := createFakeReaderAndNotPopulatedChunks(tc.samples...)
it := &populateWithDelSeriesIterator{}
it.reset(ulid.ULID{}, f, chkMetas, nil)
var result []chunks.Sample
for it.Next() != chunkenc.ValNone {
st := it.AtST()
ts, v := it.At()
result = append(result, sample{st: st, t: ts, f: v})
}
require.NoError(t, it.Err())
require.Equal(t, tc.expected, result)
})
// Test with iterables.
t.Run("iterables", func(t *testing.T) {
f, chkMetas := createFakeReaderAndIterables(tc.samples...)
it := &populateWithDelSeriesIterator{}
it.reset(ulid.ULID{}, f, chkMetas, nil)
var result []chunks.Sample
for it.Next() != chunkenc.ValNone {
st := it.AtST()
ts, v := it.At()
result = append(result, sample{st: st, t: ts, f: v})
}
require.NoError(t, it.Err())
require.Equal(t, tc.expected, result)
})
})
}
}
// TestPopulateWithDelChunkSeriesIterator_WithST tests that ST (start time) values are
// correctly preserved when re-encoding chunks with deletions.
func TestPopulateWithDelChunkSeriesIterator_WithST(t *testing.T) {
samplesWithST := []chunks.Sample{
sample{st: 100, t: 1000, f: 1.0},
sample{st: 200, t: 2000, f: 2.0},
sample{st: 300, t: 3000, f: 3.0},
sample{st: 400, t: 4000, f: 4.0},
sample{st: 500, t: 5000, f: 5.0},
}
samplesWithNoLeadingST := []chunks.Sample{
sample{st: 0, t: 1000, f: 1.0},
sample{st: 0, t: 2000, f: 2.0},
sample{st: 0, t: 3000, f: 3.0},
sample{st: 400, t: 4000, f: 4.0},
sample{st: 500, t: 5000, f: 5.0},
}
cases := []struct {
name string
samples [][]chunks.Sample
intervals tombstones.Intervals
expected []chunks.Sample
}{
{
name: "no deletions - ST preserved",
samples: [][]chunks.Sample{samplesWithST},
intervals: nil,
expected: samplesWithST,
},
{
name: "with deletions - ST preserved in remaining samples",
samples: [][]chunks.Sample{samplesWithST},
// Delete samples at t=2000 and t=4000.
intervals: tombstones.Intervals{{Mint: 2000, Maxt: 2000}, {Mint: 4000, Maxt: 4000}},
expected: []chunks.Sample{
sample{st: 100, t: 1000, f: 1.0},
sample{st: 300, t: 3000, f: 3.0},
sample{st: 500, t: 5000, f: 5.0},
},
},
{
name: "delete first sample - ST preserved",
samples: [][]chunks.Sample{samplesWithST},
// Delete first sample.
intervals: tombstones.Intervals{{Mint: 1000, Maxt: 1000}},
expected: []chunks.Sample{
sample{st: 200, t: 2000, f: 2.0},
sample{st: 300, t: 3000, f: 3.0},
sample{st: 400, t: 4000, f: 4.0},
sample{st: 500, t: 5000, f: 5.0},
},
},
{
// This tests that populateCurrForSingleChunk can handle
// chunks that don't start with ST, but introduce ST later.
name: "delete first sample - ST late preserved",
samples: [][]chunks.Sample{samplesWithNoLeadingST},
// Delete first sample.
intervals: tombstones.Intervals{{Mint: 1000, Maxt: 1000}},
expected: []chunks.Sample{
sample{st: 0, t: 2000, f: 2.0},
sample{st: 0, t: 3000, f: 3.0},
sample{st: 400, t: 4000, f: 4.0},
sample{st: 500, t: 5000, f: 5.0},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// Test with chunks that need re-encoding due to deletions.
t.Run("chunks", func(t *testing.T) {
f, chkMetas := createFakeReaderAndNotPopulatedChunks(tc.samples...)
it := &populateWithDelChunkSeriesIterator{}
it.reset(ulid.ULID{}, f, chkMetas, tc.intervals)
var result []chunks.Sample
for it.Next() {
meta := it.At()
chkIt := meta.Chunk.Iterator(nil)
for chkIt.Next() != chunkenc.ValNone {
st := chkIt.AtST()
ts, v := chkIt.At()
result = append(result, sample{st: st, t: ts, f: v})
}
require.NoError(t, chkIt.Err())
}
require.NoError(t, it.Err())
require.Equal(t, tc.expected, result)
})
// Test with iterables.
t.Run("iterables", func(t *testing.T) {
f, chkMetas := createFakeReaderAndIterables(tc.samples...)
it := &populateWithDelChunkSeriesIterator{}
it.reset(ulid.ULID{}, f, chkMetas, tc.intervals)
var result []chunks.Sample
for it.Next() {
meta := it.At()
chkIt := meta.Chunk.Iterator(nil)
for chkIt.Next() != chunkenc.ValNone {
st := chkIt.AtST()
ts, v := chkIt.At()
result = append(result, sample{st: st, t: ts, f: v})
}
require.NoError(t, chkIt.Err())
}
require.NoError(t, it.Err())
require.Equal(t, tc.expected, result)
})
})
}
}
// Test the cost of merging series sets for different number of merged sets and their size.
// The subset are all equivalent so this does not capture merging of partial or non-overlapping sets well.
// TODO(bwplotka): Merge with storage merged series set benchmark.

207
tsdb/record/bench_test.go Normal file
View File

@ -0,0 +1,207 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package record_test
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/tsdb/compression"
"github.com/prometheus/prometheus/tsdb/record"
"github.com/prometheus/prometheus/util/testrecord"
)
func zeroOutSTs(samples []record.RefSample) []record.RefSample {
out := make([]record.RefSample, len(samples))
for i := range samples {
out[i] = samples[i]
out[i].ST = 0
}
return out
}
func TestEncodeDecode(t *testing.T) {
for _, enableSTStorage := range []bool{false, true} {
for _, tcase := range []testrecord.RefSamplesCase{
testrecord.Realistic1000Samples,
testrecord.Realistic1000WithVariableSTSamples,
testrecord.Realistic1000WithConstSTSamples,
testrecord.WorstCase1000,
testrecord.WorstCase1000WithSTSamples,
} {
var (
dec record.Decoder
buf []byte
enc = record.Encoder{EnableSTStorage: enableSTStorage}
)
s := testrecord.GenTestRefSamplesCase(t, tcase)
{
got, err := dec.Samples(enc.Samples(s, nil), nil)
require.NoError(t, err)
// if ST is off, we expect all STs to be zero
expected := s
if !enableSTStorage {
expected = zeroOutSTs(s)
}
require.Equal(t, expected, got)
}
// With byte buffer (append!)
{
buf = make([]byte, 10, 1e5)
got, err := dec.Samples(enc.Samples(s, buf)[10:], nil)
require.NoError(t, err)
expected := s
if !enableSTStorage {
expected = zeroOutSTs(s)
}
require.Equal(t, expected, got)
}
// With sample slice
{
samples := make([]record.RefSample, 0, len(s)+1)
got, err := dec.Samples(enc.Samples(s, nil), samples)
require.NoError(t, err)
expected := s
if !enableSTStorage {
expected = zeroOutSTs(s)
}
require.Equal(t, expected, got)
}
// With compression.
{
buf := enc.Samples(s, nil)
cEnc, err := compression.NewEncoder()
require.NoError(t, err)
buf, _, err = cEnc.Encode(compression.Zstd, buf, nil)
require.NoError(t, err)
buf, err = compression.NewDecoder().Decode(compression.Zstd, buf, nil)
require.NoError(t, err)
got, err := dec.Samples(buf, nil)
require.NoError(t, err)
expected := s
if !enableSTStorage {
expected = zeroOutSTs(s)
}
require.Equal(t, expected, got)
}
}
}
}
var (
compressions = []compression.Type{compression.None, compression.Snappy, compression.Zstd}
dataCases = []testrecord.RefSamplesCase{
testrecord.Realistic1000Samples,
testrecord.Realistic1000WithVariableSTSamples,
testrecord.Realistic1000WithConstSTSamples,
testrecord.WorstCase1000,
testrecord.WorstCase1000WithSTSamples,
}
UseV2 = true
)
/*
export bench=encode-v2 && go test ./tsdb/record/... \
-run '^$' -bench '^BenchmarkEncode_Samples' \
-benchtime 5s -count 6 -cpu 2 -timeout 999m \
| tee ${bench}.txt
*/
func BenchmarkEncode_Samples(b *testing.B) {
for _, compr := range compressions {
for _, data := range dataCases {
b.Run(fmt.Sprintf("compr=%v/data=%v", compr, data), func(b *testing.B) {
var (
samples = testrecord.GenTestRefSamplesCase(b, data)
enc = record.Encoder{EnableSTStorage: UseV2}
buf []byte
cBuf []byte
)
cEnc, err := compression.NewEncoder()
require.NoError(b, err)
// Warm up.
buf = enc.Samples(samples, buf[:0])
cBuf, _, err = cEnc.Encode(compr, buf, cBuf[:0])
require.NoError(b, err)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
buf = enc.Samples(samples, buf[:0])
b.ReportMetric(float64(len(buf)), "B/rec")
cBuf, _, _ = cEnc.Encode(compr, buf, cBuf[:0])
b.ReportMetric(float64(len(cBuf)), "B/compressed-rec")
}
})
}
}
}
/*
export bench=decode-v2 && go test ./tsdb/record/... \
-run '^$' -bench '^BenchmarkDecode_Samples' \
-benchtime 5s -count 6 -cpu 2 -timeout 999m \
| tee ${bench}.txt
*/
func BenchmarkDecode_Samples(b *testing.B) {
for _, compr := range compressions {
for _, data := range dataCases {
b.Run(fmt.Sprintf("compr=%v/data=%v", compr, data), func(b *testing.B) {
var (
samples = testrecord.GenTestRefSamplesCase(b, data)
enc = record.Encoder{EnableSTStorage: UseV2}
dec record.Decoder
cDec = compression.NewDecoder()
cBuf []byte
samplesBuf []record.RefSample
)
buf := enc.Samples(samples, nil)
cEnc, err := compression.NewEncoder()
require.NoError(b, err)
buf, _, err = cEnc.Encode(compr, buf, nil)
require.NoError(b, err)
// Warm up.
cBuf, err = cDec.Decode(compr, buf, cBuf[:0])
require.NoError(b, err)
samplesBuf, err = dec.Samples(cBuf, samplesBuf[:0])
require.NoError(b, err)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
cBuf, _ = cDec.Decode(compr, buf, cBuf[:0])
samplesBuf, _ = dec.Samples(cBuf, samplesBuf[:0])
}
})
}
}
}

View File

@ -58,6 +58,8 @@ const (
CustomBucketsHistogramSamples Type = 9
// CustomBucketsFloatHistogramSamples is used to match WAL records of type Float Histogram with custom buckets.
CustomBucketsFloatHistogramSamples Type = 10
// SamplesV2 is an enhanced sample record with an encoding scheme that allows storing float samples with timestamp and an optional ST per sample.
SamplesV2 Type = 11
)
func (rt Type) String() string {
@ -66,6 +68,8 @@ func (rt Type) String() string {
return "series"
case Samples:
return "samples"
case SamplesV2:
return "samples-v2"
case Tombstones:
return "tombstones"
case Exemplars:
@ -157,12 +161,12 @@ type RefSeries struct {
Labels labels.Labels
}
// RefSample is a timestamp/value pair associated with a reference to a series.
// RefSample is a timestamp/st/value struct associated with a reference to a series.
// TODO(beorn7): Perhaps make this "polymorphic", including histogram and float-histogram pointers? Then get rid of RefHistogramSample.
type RefSample struct {
Ref chunks.HeadSeriesRef
T int64
V float64
Ref chunks.HeadSeriesRef
ST, T int64
V float64
}
// RefMetadata is the metadata associated with a series ID.
@ -182,6 +186,7 @@ type RefExemplar struct {
}
// RefHistogramSample is a histogram.
// TODO(owilliams): Add support for ST.
type RefHistogramSample struct {
Ref chunks.HeadSeriesRef
T int64
@ -189,6 +194,7 @@ type RefHistogramSample struct {
}
// RefFloatHistogramSample is a float histogram.
// TODO(owilliams): Add support for ST.
type RefFloatHistogramSample struct {
Ref chunks.HeadSeriesRef
T int64
@ -220,7 +226,7 @@ func (*Decoder) Type(rec []byte) Type {
return Unknown
}
switch t := Type(rec[0]); t {
case Series, Samples, Tombstones, Exemplars, MmapMarkers, Metadata, HistogramSamples, FloatHistogramSamples, CustomBucketsHistogramSamples, CustomBucketsFloatHistogramSamples:
case Series, Samples, SamplesV2, Tombstones, Exemplars, MmapMarkers, Metadata, HistogramSamples, FloatHistogramSamples, CustomBucketsHistogramSamples, CustomBucketsFloatHistogramSamples:
return t
}
return Unknown
@ -311,12 +317,20 @@ func (d *Decoder) DecodeLabels(dec *encoding.Decbuf) labels.Labels {
}
// Samples appends samples in rec to the given slice.
func (*Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
dec := encoding.Decbuf{B: rec}
if Type(dec.Byte()) != Samples {
return nil, errors.New("invalid record type")
switch typ := dec.Byte(); Type(typ) {
case Samples:
return d.samplesV1(&dec, samples)
case SamplesV2:
return d.samplesV2(&dec, samples)
default:
return nil, fmt.Errorf("invalid record type %v, expected Samples(2) or SamplesV2(11)", typ)
}
}
// samplesV1 appends samples in rec to the given slice, while ignoring ST information.
func (*Decoder) samplesV1(dec *encoding.Decbuf, samples []RefSample) ([]RefSample, error) {
if dec.Len() == 0 {
return samples, nil
}
@ -349,6 +363,60 @@ func (*Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
return samples, nil
}
// samplesV2 appends samples in rec to the given slice using the V2 algorithm,
// which is more efficient and supports ST (See Encoder.samplesV2 definition).
func (*Decoder) samplesV2(dec *encoding.Decbuf, samples []RefSample) ([]RefSample, error) {
if dec.Len() == 0 {
return samples, nil
}
// Allow 1 byte for each varint and 8 for the value; the output slice must be at least that big.
if minSize := dec.Len() / (1 + 1 + 8); cap(samples) < minSize {
samples = make([]RefSample, 0, minSize)
}
var firstT, firstST int64
for len(dec.B) > 0 && dec.Err() == nil {
var prev RefSample
var ref, t, ST int64
var val uint64
if len(samples) == 0 {
ref = dec.Varint64()
firstT = dec.Varint64()
t = firstT
ST = dec.Varint64()
firstST = ST
} else {
prev = samples[len(samples)-1]
ref = int64(prev.Ref) + dec.Varint64()
t = firstT + dec.Varint64()
stMarker := dec.Byte()
switch stMarker {
case noST:
case sameST:
ST = prev.ST
default:
ST = firstST + dec.Varint64()
}
}
val = dec.Be64()
samples = append(samples, RefSample{
Ref: chunks.HeadSeriesRef(ref),
ST: ST,
T: t,
V: math.Float64frombits(val),
})
}
if dec.Err() != nil {
return nil, fmt.Errorf("decode error after %d samples: %w", len(samples), dec.Err())
}
if len(dec.B) > 0 {
return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
}
return samples, nil
}
// Tombstones appends tombstones in rec to the given slice.
func (*Decoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) {
dec := encoding.Decbuf{B: rec}
@ -656,7 +724,11 @@ func DecodeFloatHistogram(buf *encoding.Decbuf, fh *histogram.FloatHistogram) {
// Encoder encodes series, sample, and tombstones records.
// The zero value is ready to use.
type Encoder struct{}
type Encoder struct {
// EnableSTStorage enables the SamplesV2 encoding, which is more efficient
// than V1 and supports start time per sample.
EnableSTStorage bool
}
// Series appends the encoded series to b and returns the resulting slice.
func (*Encoder) Series(series []RefSeries, b []byte) []byte {
@ -702,7 +774,16 @@ func EncodeLabels(buf *encoding.Encbuf, lbls labels.Labels) {
}
// Samples appends the encoded samples to b and returns the resulting slice.
func (*Encoder) Samples(samples []RefSample, b []byte) []byte {
// Depending on the ST existence it either writes Samples or SamplesWithST record.
func (e *Encoder) Samples(samples []RefSample, b []byte) []byte {
if e.EnableSTStorage {
return e.samplesV2(samples, b)
}
return e.samplesV1(samples, b)
}
// Samples appends the encoded samples to b and returns the resulting slice.
func (*Encoder) samplesV1(samples []RefSample, b []byte) []byte {
buf := encoding.Encbuf{B: b}
buf.PutByte(byte(Samples))
@ -725,6 +806,56 @@ func (*Encoder) Samples(samples []RefSample, b []byte) []byte {
return buf.Get()
}
const (
// Start timestamp marker values for indicating trivial cases.
noST byte = iota // Sample has no start time.
sameST // Sample timestamp exists and is the same as the start time of the previous series.
explicitST // Explicit start timestamp value, delta to first start time.
)
// samplesV2 appends the encoded samples to b and returns the resulting slice
// using a more efficient per-sample delta encoding and allows for ST
// storage.
func (*Encoder) samplesV2(samples []RefSample, b []byte) []byte {
buf := encoding.Encbuf{B: b}
buf.PutByte(byte(SamplesV2))
if len(samples) == 0 {
return buf.Get()
}
// Store first ref, timestamp, ST, and value.
first := samples[0]
buf.PutVarint64(int64(first.Ref))
buf.PutVarint64(first.T)
buf.PutVarint64(first.ST)
buf.PutBE64(math.Float64bits(first.V))
// Subsequent values are delta to the immediate previous values, and in the
// case of start timestamp, use the marker byte to indicate what the value should
// be if it's one of the trivial cases.
for i := 1; i < len(samples); i++ {
s := samples[i]
prev := samples[i-1]
buf.PutVarint64(int64(s.Ref) - int64(prev.Ref))
buf.PutVarint64(s.T - first.T)
switch s.ST {
case 0:
buf.PutByte(noST)
case prev.ST:
buf.PutByte(sameST)
default:
buf.PutByte(explicitST)
buf.PutVarint64(s.ST - first.ST)
}
buf.PutBE64(math.Float64bits(s.V))
}
return buf.Get()
}
// Tombstones appends the encoded tombstones to b and returns the resulting slice.
func (*Encoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte {
buf := encoding.Encbuf{B: b}

View File

@ -76,15 +76,63 @@ func TestRecord_EncodeDecode(t *testing.T) {
require.NoError(t, err)
require.Equal(t, metadata, decMetadata)
// Without ST.
samples := []RefSample{
{Ref: 0, T: 12423423, V: 1.2345},
{Ref: 123, T: -1231, V: -123},
{Ref: 2, T: 0, V: 99999},
}
decSamples, err := dec.Samples(enc.Samples(samples, nil), nil)
encoded := enc.Samples(samples, nil)
require.Equal(t, Samples, dec.Type(encoded))
decSamples, err := dec.Samples(encoded, nil)
require.NoError(t, err)
require.Equal(t, samples, decSamples)
enc = Encoder{EnableSTStorage: true}
// Without ST again, but with V1 encoder that enables SamplesV2.
samples = []RefSample{
{Ref: 0, T: 12423423, V: 1.2345},
{Ref: 123, T: -1231, V: -123},
{Ref: 2, T: 0, V: 99999},
}
encoded = enc.Samples(samples, nil)
require.Equal(t, SamplesV2, dec.Type(encoded))
decSamples, err = dec.Samples(encoded, nil)
require.NoError(t, err)
require.Equal(t, samples, decSamples)
// With ST.
samplesWithST := []RefSample{
{Ref: 0, T: 12423423, ST: 14, V: 1.2345},
{Ref: 123, T: -1231, ST: 14, V: -123},
{Ref: 2, T: 0, ST: 14, V: 99999},
}
encoded = enc.Samples(samplesWithST, nil)
require.Equal(t, SamplesV2, dec.Type(encoded))
decSamples, err = dec.Samples(encoded, nil)
require.NoError(t, err)
require.Equal(t, samplesWithST, decSamples)
// With ST (ST[i] == T[i-1]).
samplesWithSTDelta := []RefSample{
{Ref: 0, T: 12423400, ST: 12423300, V: 1.2345},
{Ref: 123, T: 12423500, ST: 12423400, V: -123},
{Ref: 2, T: 12423600, ST: 12423500, V: 99999},
}
decSamples, err = dec.Samples(enc.Samples(samplesWithSTDelta, nil), nil)
require.NoError(t, err)
require.Equal(t, samplesWithSTDelta, decSamples)
// With ST (ST[i] == ST[i-1]).
samplesWithConstST := []RefSample{
{Ref: 0, T: 12423400, ST: 12423300, V: 1.2345},
{Ref: 123, T: 12423500, ST: 12423300, V: -123},
{Ref: 2, T: 12423600, ST: 12423300, V: 99999},
}
decSamples, err = dec.Samples(enc.Samples(samplesWithConstST, nil), nil)
require.NoError(t, err)
require.Equal(t, samplesWithConstST, decSamples)
// Intervals get split up into single entries. So we don't get back exactly
// what we put in.
tstones := []tombstones.Stone{
@ -227,252 +275,262 @@ func TestRecord_EncodeDecode(t *testing.T) {
}
func TestRecord_DecodeInvalidHistogramSchema(t *testing.T) {
for _, schema := range []int32{-100, 100} {
t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
var enc Encoder
for _, enableSTStorage := range []bool{false, true} {
for _, schema := range []int32{-100, 100} {
t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
enc := Encoder{EnableSTStorage: enableSTStorage}
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefHistogramSample{
{
Ref: 56,
T: 1234,
H: &histogram.Histogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefHistogramSample{
{
Ref: 56,
T: 1234,
H: &histogram.Histogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
},
PositiveBuckets: []int64{1, 1, -1, 0},
},
},
}
histSamples, _ := enc.HistogramSamples(histograms, nil)
decHistograms, err := dec.HistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Empty(t, decHistograms)
require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
})
}
histSamples, _ := enc.HistogramSamples(histograms, nil)
decHistograms, err := dec.HistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Empty(t, decHistograms)
require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
})
}
}
}
func TestRecord_DecodeInvalidFloatHistogramSchema(t *testing.T) {
for _, schema := range []int32{-100, 100} {
t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
var enc Encoder
for _, enableSTStorage := range []bool{false, true} {
for _, schema := range []int32{-100, 100} {
t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
enc := Encoder{EnableSTStorage: enableSTStorage}
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefFloatHistogramSample{
{
Ref: 56,
T: 1234,
FH: &histogram.FloatHistogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefFloatHistogramSample{
{
Ref: 56,
T: 1234,
FH: &histogram.FloatHistogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []float64{1, 1, -1, 0},
},
PositiveBuckets: []float64{1, 1, -1, 0},
},
},
}
histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Empty(t, decHistograms)
require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
})
}
histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Empty(t, decHistograms)
require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
})
}
}
}
func TestRecord_DecodeTooHighResolutionHistogramSchema(t *testing.T) {
for _, schema := range []int32{9, 52} {
t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
var enc Encoder
for _, enableSTStorage := range []bool{false, true} {
for _, schema := range []int32{9, 52} {
t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
enc := Encoder{EnableSTStorage: enableSTStorage}
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefHistogramSample{
{
Ref: 56,
T: 1234,
H: &histogram.Histogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefHistogramSample{
{
Ref: 56,
T: 1234,
H: &histogram.Histogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
},
PositiveBuckets: []int64{1, 1, -1, 0},
},
},
}
histSamples, _ := enc.HistogramSamples(histograms, nil)
decHistograms, err := dec.HistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Len(t, decHistograms, 1)
require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].H.Schema)
})
}
histSamples, _ := enc.HistogramSamples(histograms, nil)
decHistograms, err := dec.HistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Len(t, decHistograms, 1)
require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].H.Schema)
})
}
}
}
func TestRecord_DecodeTooHighResolutionFloatHistogramSchema(t *testing.T) {
for _, schema := range []int32{9, 52} {
t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
var enc Encoder
for _, enableSTStorage := range []bool{false, true} {
for _, schema := range []int32{9, 52} {
t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
enc := Encoder{EnableSTStorage: enableSTStorage}
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefFloatHistogramSample{
{
Ref: 56,
T: 1234,
FH: &histogram.FloatHistogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
var output bytes.Buffer
logger := promslog.New(&promslog.Config{Writer: &output})
dec := NewDecoder(labels.NewSymbolTable(), logger)
histograms := []RefFloatHistogramSample{
{
Ref: 56,
T: 1234,
FH: &histogram.FloatHistogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: schema,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []float64{1, 1, -1, 0},
},
PositiveBuckets: []float64{1, 1, -1, 0},
},
},
}
histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Len(t, decHistograms, 1)
require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].FH.Schema)
})
}
histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
require.NoError(t, err)
require.Len(t, decHistograms, 1)
require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].FH.Schema)
})
}
}
}
// TestRecord_Corrupted ensures that corrupted records return the correct error.
// Bugfix check for pull/521 and pull/523.
func TestRecord_Corrupted(t *testing.T) {
var enc Encoder
dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
for _, enableSTStorage := range []bool{false, true} {
enc := Encoder{EnableSTStorage: enableSTStorage}
dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
t.Run("Test corrupted series record", func(t *testing.T) {
series := []RefSeries{
{
Ref: 100,
Labels: labels.FromStrings("abc", "def", "123", "456"),
},
}
corrupted := enc.Series(series, nil)[:8]
_, err := dec.Series(corrupted, nil)
require.Equal(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted sample record", func(t *testing.T) {
samples := []RefSample{
{Ref: 0, T: 12423423, V: 1.2345},
}
corrupted := enc.Samples(samples, nil)[:8]
_, err := dec.Samples(corrupted, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted tombstone record", func(t *testing.T) {
tstones := []tombstones.Stone{
{Ref: 123, Intervals: tombstones.Intervals{
{Mint: -1000, Maxt: 1231231},
{Mint: 5000, Maxt: 0},
}},
}
corrupted := enc.Tombstones(tstones, nil)[:8]
_, err := dec.Tombstones(corrupted, nil)
require.Equal(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted exemplar record", func(t *testing.T) {
exemplars := []RefExemplar{
{Ref: 0, T: 12423423, V: 1.2345, Labels: labels.FromStrings("trace_id", "asdf")},
}
corrupted := enc.Exemplars(exemplars, nil)[:8]
_, err := dec.Exemplars(corrupted, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted metadata record", func(t *testing.T) {
meta := []RefMetadata{
{Ref: 147, Type: uint8(Counter), Unit: "unit", Help: "help"},
}
corrupted := enc.Metadata(meta, nil)[:8]
_, err := dec.Metadata(corrupted, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted histogram record", func(t *testing.T) {
histograms := []RefHistogramSample{
{
Ref: 56,
T: 1234,
H: &histogram.Histogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: 1,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
t.Run("Test corrupted series record", func(t *testing.T) {
series := []RefSeries{
{
Ref: 100,
Labels: labels.FromStrings("abc", "def", "123", "456"),
},
},
{
Ref: 67,
T: 5678,
H: &histogram.Histogram{
Count: 8,
ZeroThreshold: 0.001,
Sum: 35.5,
Schema: -53,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 2, Length: 2},
},
PositiveBuckets: []int64{2, -1, 2, 0},
CustomValues: []float64{0, 2, 4, 6, 8},
},
},
}
}
corruptedHists, customBucketsHists := enc.HistogramSamples(histograms, nil)
corruptedHists = corruptedHists[:8]
corruptedCustomBucketsHists := enc.CustomBucketsHistogramSamples(customBucketsHists, nil)
corruptedCustomBucketsHists = corruptedCustomBucketsHists[:8]
_, err := dec.HistogramSamples(corruptedHists, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
_, err = dec.HistogramSamples(corruptedCustomBucketsHists, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
corrupted := enc.Series(series, nil)[:8]
_, err := dec.Series(corrupted, nil)
require.Equal(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted sample record", func(t *testing.T) {
samples := []RefSample{
{Ref: 0, T: 12423423, V: 1.2345},
}
corrupted := enc.Samples(samples, nil)[:8]
_, err := dec.Samples(corrupted, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted tombstone record", func(t *testing.T) {
tstones := []tombstones.Stone{
{Ref: 123, Intervals: tombstones.Intervals{
{Mint: -1000, Maxt: 1231231},
{Mint: 5000, Maxt: 0},
}},
}
corrupted := enc.Tombstones(tstones, nil)[:8]
_, err := dec.Tombstones(corrupted, nil)
require.Equal(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted exemplar record", func(t *testing.T) {
exemplars := []RefExemplar{
{Ref: 0, T: 12423423, V: 1.2345, Labels: labels.FromStrings("trace_id", "asdf")},
}
corrupted := enc.Exemplars(exemplars, nil)[:8]
_, err := dec.Exemplars(corrupted, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted metadata record", func(t *testing.T) {
meta := []RefMetadata{
{Ref: 147, Type: uint8(Counter), Unit: "unit", Help: "help"},
}
corrupted := enc.Metadata(meta, nil)[:8]
_, err := dec.Metadata(corrupted, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
t.Run("Test corrupted histogram record", func(t *testing.T) {
histograms := []RefHistogramSample{
{
Ref: 56,
T: 1234,
H: &histogram.Histogram{
Count: 5,
ZeroCount: 2,
ZeroThreshold: 0.001,
Sum: 18.4 * rand.Float64(),
Schema: 1,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []int64{1, 1, -1, 0},
},
},
{
Ref: 67,
T: 5678,
H: &histogram.Histogram{
Count: 8,
ZeroThreshold: 0.001,
Sum: 35.5,
Schema: -53,
PositiveSpans: []histogram.Span{
{Offset: 0, Length: 2},
{Offset: 2, Length: 2},
},
PositiveBuckets: []int64{2, -1, 2, 0},
CustomValues: []float64{0, 2, 4, 6, 8},
},
},
}
corruptedHists, customBucketsHists := enc.HistogramSamples(histograms, nil)
corruptedHists = corruptedHists[:8]
corruptedCustomBucketsHists := enc.CustomBucketsHistogramSamples(customBucketsHists, nil)
corruptedCustomBucketsHists = corruptedCustomBucketsHists[:8]
_, err := dec.HistogramSamples(corruptedHists, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
_, err = dec.HistogramSamples(corruptedCustomBucketsHists, nil)
require.ErrorIs(t, err, encoding.ErrInvalidSize)
})
}
}
func TestRecord_Type(t *testing.T) {
@ -487,6 +545,16 @@ func TestRecord_Type(t *testing.T) {
recordType = dec.Type(enc.Samples(samples, nil))
require.Equal(t, Samples, recordType)
// With EnableSTStorage set, all Samples are V2.
enc = Encoder{EnableSTStorage: true}
samples = []RefSample{{Ref: 123, T: 12345, V: 1.2345}}
recordType = dec.Type(enc.Samples(samples, nil))
require.Equal(t, SamplesV2, recordType)
samplesST := []RefSample{{Ref: 123, ST: 1, T: 12345, V: 1.2345}}
recordType = dec.Type(enc.Samples(samplesST, nil))
require.Equal(t, SamplesV2, recordType)
tstones := []tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{{Mint: 1, Maxt: 2}}}}
recordType = dec.Type(enc.Tombstones(tstones, nil))
require.Equal(t, Tombstones, recordType)
@ -716,24 +784,26 @@ func BenchmarkWAL_HistogramEncoding(b *testing.B) {
make: initNHCBRefs,
},
} {
for _, labelCount := range []int{0, 10, 50} {
for _, histograms := range []int{10, 100, 1000} {
for _, buckets := range []int{0, 1, 10, 100} {
b.Run(fmt.Sprintf("type=%s/labels=%d/histograms=%d/buckets=%d", maker.name, labelCount, histograms, buckets), func(b *testing.B) {
series, samples, nhcbs := maker.make(labelCount, histograms, buckets)
enc := Encoder{}
for b.Loop() {
var buf []byte
enc.Series(series, buf)
enc.Samples(samples, buf)
var leftOver []RefHistogramSample
_, leftOver = enc.HistogramSamples(nhcbs, buf)
if len(leftOver) > 0 {
enc.CustomBucketsHistogramSamples(leftOver, buf)
for _, enableSTStorage := range []bool{false, true} {
for _, labelCount := range []int{0, 10, 50} {
for _, histograms := range []int{10, 100, 1000} {
for _, buckets := range []int{0, 1, 10, 100} {
b.Run(fmt.Sprintf("type=%s/labels=%d/histograms=%d/buckets=%d", maker.name, labelCount, histograms, buckets), func(b *testing.B) {
series, samples, nhcbs := maker.make(labelCount, histograms, buckets)
enc := Encoder{EnableSTStorage: enableSTStorage}
for b.Loop() {
var buf []byte
enc.Series(series, buf)
enc.Samples(samples, buf)
var leftOver []RefHistogramSample
_, leftOver = enc.HistogramSamples(nhcbs, buf)
if len(leftOver) > 0 {
enc.CustomBucketsHistogramSamples(leftOver, buf)
}
b.ReportMetric(float64(len(buf)), "recordBytes/ops")
}
b.ReportMetric(float64(len(buf)), "recordBytes/ops")
}
})
})
}
}
}
}

View File

@ -102,7 +102,7 @@ func DeleteTempCheckpoints(logger *slog.Logger, dir string) error {
// segmented format as the original WAL itself.
// This makes it easy to read it through the WAL package and concatenate
// it with the original WAL.
func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) {
func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64, enableSTStorage bool) (*CheckpointStats, error) {
stats := &CheckpointStats{}
var sgmReader io.ReadCloser
@ -166,7 +166,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He
metadata []record.RefMetadata
st = labels.NewSymbolTable() // Needed for decoding; labels do not outlive this function.
dec = record.NewDecoder(st, logger)
enc record.Encoder
enc = record.Encoder{EnableSTStorage: enableSTStorage}
buf []byte
recs [][]byte
@ -200,7 +200,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He
stats.TotalSeries += len(series)
stats.DroppedSeries += len(series) - len(repl)
case record.Samples:
case record.Samples, record.SamplesV2:
samples, err = dec.Samples(rec, samples)
if err != nil {
return nil, fmt.Errorf("decode samples: %w", err)

View File

@ -171,251 +171,257 @@ func TestCheckpoint(t *testing.T) {
}
}
for _, compress := range compression.Types() {
t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) {
dir := t.TempDir()
for _, enableSTStorage := range []bool{false, true} {
for _, compress := range compression.Types() {
t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableSTStorage), func(t *testing.T) {
dir := t.TempDir()
var enc record.Encoder
// Create a dummy segment to bump the initial number.
seg, err := CreateSegment(dir, 100)
require.NoError(t, err)
require.NoError(t, seg.Close())
// Manually create checkpoint for 99 and earlier.
w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress)
require.NoError(t, err)
// Add some data we expect to be around later.
err = w.Log(enc.Series([]record.RefSeries{
{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
{Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")},
}, nil))
require.NoError(t, err)
// Log an unknown record, that might have come from a future Prometheus version.
require.NoError(t, w.Log([]byte{255}))
require.NoError(t, w.Close())
// Start a WAL and write records to it as usual.
w, err = NewSize(nil, nil, dir, 128*1024, compress)
require.NoError(t, err)
samplesInWAL, histogramsInWAL, floatHistogramsInWAL := 0, 0, 0
var last int64
for i := 0; ; i++ {
_, n, err := Segments(w.Dir())
enc := record.Encoder{EnableSTStorage: enableSTStorage}
// Create a dummy segment to bump the initial number.
seg, err := CreateSegment(dir, 100)
require.NoError(t, err)
if n >= 106 {
break
}
// Write some series initially.
if i == 0 {
b := enc.Series([]record.RefSeries{
{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
{Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")},
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
{Ref: 5, Labels: labels.FromStrings("a", "b", "c", "5")},
require.NoError(t, seg.Close())
// Manually create checkpoint for 99 and earlier.
w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress)
require.NoError(t, err)
// Add some data we expect to be around later.
err = w.Log(enc.Series([]record.RefSeries{
{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
{Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")},
}, nil))
require.NoError(t, err)
// Log an unknown record, that might have come from a future Prometheus version.
require.NoError(t, w.Log([]byte{255}))
require.NoError(t, w.Close())
// Start a WAL and write records to it as usual.
w, err = NewSize(nil, nil, dir, 128*1024, compress)
require.NoError(t, err)
samplesInWAL, histogramsInWAL, floatHistogramsInWAL := 0, 0, 0
var last int64
for i := 0; ; i++ {
_, n, err := Segments(w.Dir())
require.NoError(t, err)
if n >= 106 {
break
}
// Write some series initially.
if i == 0 {
b := enc.Series([]record.RefSeries{
{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
{Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")},
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
{Ref: 5, Labels: labels.FromStrings("a", "b", "c", "5")},
}, nil)
require.NoError(t, w.Log(b))
b = enc.Metadata([]record.RefMetadata{
{Ref: 2, Unit: "unit", Help: "help"},
{Ref: 3, Unit: "unit", Help: "help"},
{Ref: 4, Unit: "unit", Help: "help"},
{Ref: 5, Unit: "unit", Help: "help"},
}, nil)
require.NoError(t, w.Log(b))
}
// Write samples until the WAL has enough segments.
// Make them have drifting timestamps within a record to see that they
// get filtered properly.
b := enc.Samples([]record.RefSample{
{Ref: 0, T: last, V: float64(i)},
{Ref: 1, T: last + 10000, V: float64(i)},
{Ref: 2, T: last + 20000, V: float64(i)},
{Ref: 3, T: last + 30000, V: float64(i)},
}, nil)
require.NoError(t, w.Log(b))
samplesInWAL += 4
h := makeHistogram(i)
b, _ = enc.HistogramSamples([]record.RefHistogramSample{
{Ref: 0, T: last, H: h},
{Ref: 1, T: last + 10000, H: h},
{Ref: 2, T: last + 20000, H: h},
{Ref: 3, T: last + 30000, H: h},
}, nil)
require.NoError(t, w.Log(b))
histogramsInWAL += 4
cbh := makeCustomBucketHistogram(i)
b = enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{
{Ref: 0, T: last, H: cbh},
{Ref: 1, T: last + 10000, H: cbh},
{Ref: 2, T: last + 20000, H: cbh},
{Ref: 3, T: last + 30000, H: cbh},
}, nil)
require.NoError(t, w.Log(b))
histogramsInWAL += 4
fh := makeFloatHistogram(i)
b, _ = enc.FloatHistogramSamples([]record.RefFloatHistogramSample{
{Ref: 0, T: last, FH: fh},
{Ref: 1, T: last + 10000, FH: fh},
{Ref: 2, T: last + 20000, FH: fh},
{Ref: 3, T: last + 30000, FH: fh},
}, nil)
require.NoError(t, w.Log(b))
floatHistogramsInWAL += 4
cbfh := makeCustomBucketFloatHistogram(i)
b = enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{
{Ref: 0, T: last, FH: cbfh},
{Ref: 1, T: last + 10000, FH: cbfh},
{Ref: 2, T: last + 20000, FH: cbfh},
{Ref: 3, T: last + 30000, FH: cbfh},
}, nil)
require.NoError(t, w.Log(b))
floatHistogramsInWAL += 4
b = enc.Exemplars([]record.RefExemplar{
{Ref: 1, T: last, V: float64(i), Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i))},
}, nil)
require.NoError(t, w.Log(b))
// Write changing metadata for each series. In the end, only the latest
// version should end up in the checkpoint.
b = enc.Metadata([]record.RefMetadata{
{Ref: 2, Unit: "unit", Help: "help"},
{Ref: 3, Unit: "unit", Help: "help"},
{Ref: 4, Unit: "unit", Help: "help"},
{Ref: 5, Unit: "unit", Help: "help"},
{Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
{Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
{Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
{Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
}, nil)
require.NoError(t, w.Log(b))
last += 100
}
// Write samples until the WAL has enough segments.
// Make them have drifting timestamps within a record to see that they
// get filtered properly.
b := enc.Samples([]record.RefSample{
{Ref: 0, T: last, V: float64(i)},
{Ref: 1, T: last + 10000, V: float64(i)},
{Ref: 2, T: last + 20000, V: float64(i)},
{Ref: 3, T: last + 30000, V: float64(i)},
}, nil)
require.NoError(t, w.Log(b))
samplesInWAL += 4
h := makeHistogram(i)
b, _ = enc.HistogramSamples([]record.RefHistogramSample{
{Ref: 0, T: last, H: h},
{Ref: 1, T: last + 10000, H: h},
{Ref: 2, T: last + 20000, H: h},
{Ref: 3, T: last + 30000, H: h},
}, nil)
require.NoError(t, w.Log(b))
histogramsInWAL += 4
cbh := makeCustomBucketHistogram(i)
b = enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{
{Ref: 0, T: last, H: cbh},
{Ref: 1, T: last + 10000, H: cbh},
{Ref: 2, T: last + 20000, H: cbh},
{Ref: 3, T: last + 30000, H: cbh},
}, nil)
require.NoError(t, w.Log(b))
histogramsInWAL += 4
fh := makeFloatHistogram(i)
b, _ = enc.FloatHistogramSamples([]record.RefFloatHistogramSample{
{Ref: 0, T: last, FH: fh},
{Ref: 1, T: last + 10000, FH: fh},
{Ref: 2, T: last + 20000, FH: fh},
{Ref: 3, T: last + 30000, FH: fh},
}, nil)
require.NoError(t, w.Log(b))
floatHistogramsInWAL += 4
cbfh := makeCustomBucketFloatHistogram(i)
b = enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{
{Ref: 0, T: last, FH: cbfh},
{Ref: 1, T: last + 10000, FH: cbfh},
{Ref: 2, T: last + 20000, FH: cbfh},
{Ref: 3, T: last + 30000, FH: cbfh},
}, nil)
require.NoError(t, w.Log(b))
floatHistogramsInWAL += 4
require.NoError(t, w.Close())
b = enc.Exemplars([]record.RefExemplar{
{Ref: 1, T: last, V: float64(i), Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i))},
}, nil)
require.NoError(t, w.Log(b))
stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool {
return x%2 == 0
}, last/2, enableSTStorage)
require.NoError(t, err)
require.NoError(t, w.Truncate(107))
require.NoError(t, DeleteCheckpoints(w.Dir(), 106))
require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples)
require.Positive(t, stats.DroppedSamples)
// Write changing metadata for each series. In the end, only the latest
// version should end up in the checkpoint.
b = enc.Metadata([]record.RefMetadata{
{Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
{Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
{Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
{Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
}, nil)
require.NoError(t, w.Log(b))
// Only the new checkpoint should be left.
files, err := os.ReadDir(dir)
require.NoError(t, err)
require.Len(t, files, 1)
require.Equal(t, "checkpoint.00000106", files[0].Name())
last += 100
}
require.NoError(t, w.Close())
sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.00000106"))
require.NoError(t, err)
defer sr.Close()
stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool {
return x%2 == 0
}, last/2)
require.NoError(t, err)
require.NoError(t, w.Truncate(107))
require.NoError(t, DeleteCheckpoints(w.Dir(), 106))
require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples)
require.Positive(t, stats.DroppedSamples)
dec := record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
var series []record.RefSeries
var metadata []record.RefMetadata
r := NewReader(sr)
// Only the new checkpoint should be left.
files, err := os.ReadDir(dir)
require.NoError(t, err)
require.Len(t, files, 1)
require.Equal(t, "checkpoint.00000106", files[0].Name())
samplesInCheckpoint, histogramsInCheckpoint, floatHistogramsInCheckpoint := 0, 0, 0
for r.Next() {
rec := r.Record()
sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.00000106"))
require.NoError(t, err)
defer sr.Close()
dec := record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
var series []record.RefSeries
var metadata []record.RefMetadata
r := NewReader(sr)
samplesInCheckpoint, histogramsInCheckpoint, floatHistogramsInCheckpoint := 0, 0, 0
for r.Next() {
rec := r.Record()
switch dec.Type(rec) {
case record.Series:
series, err = dec.Series(rec, series)
require.NoError(t, err)
case record.Samples:
samples, err := dec.Samples(rec, nil)
require.NoError(t, err)
for _, s := range samples {
require.GreaterOrEqual(t, s.T, last/2, "sample with wrong timestamp")
switch dec.Type(rec) {
case record.Series:
series, err = dec.Series(rec, series)
require.NoError(t, err)
case record.Samples, record.SamplesV2:
samples, err := dec.Samples(rec, nil)
require.NoError(t, err)
for _, s := range samples {
require.GreaterOrEqual(t, s.T, last/2, "sample with wrong timestamp")
}
samplesInCheckpoint += len(samples)
case record.HistogramSamples, record.CustomBucketsHistogramSamples:
histograms, err := dec.HistogramSamples(rec, nil)
require.NoError(t, err)
for _, h := range histograms {
require.GreaterOrEqual(t, h.T, last/2, "histogram with wrong timestamp")
}
histogramsInCheckpoint += len(histograms)
case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
floatHistograms, err := dec.FloatHistogramSamples(rec, nil)
require.NoError(t, err)
for _, h := range floatHistograms {
require.GreaterOrEqual(t, h.T, last/2, "float histogram with wrong timestamp")
}
floatHistogramsInCheckpoint += len(floatHistograms)
case record.Exemplars:
exemplars, err := dec.Exemplars(rec, nil)
require.NoError(t, err)
for _, e := range exemplars {
require.GreaterOrEqual(t, e.T, last/2, "exemplar with wrong timestamp")
}
case record.Metadata:
metadata, err = dec.Metadata(rec, metadata)
require.NoError(t, err)
}
samplesInCheckpoint += len(samples)
case record.HistogramSamples, record.CustomBucketsHistogramSamples:
histograms, err := dec.HistogramSamples(rec, nil)
require.NoError(t, err)
for _, h := range histograms {
require.GreaterOrEqual(t, h.T, last/2, "histogram with wrong timestamp")
}
histogramsInCheckpoint += len(histograms)
case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
floatHistograms, err := dec.FloatHistogramSamples(rec, nil)
require.NoError(t, err)
for _, h := range floatHistograms {
require.GreaterOrEqual(t, h.T, last/2, "float histogram with wrong timestamp")
}
floatHistogramsInCheckpoint += len(floatHistograms)
case record.Exemplars:
exemplars, err := dec.Exemplars(rec, nil)
require.NoError(t, err)
for _, e := range exemplars {
require.GreaterOrEqual(t, e.T, last/2, "exemplar with wrong timestamp")
}
case record.Metadata:
metadata, err = dec.Metadata(rec, metadata)
require.NoError(t, err)
}
}
require.NoError(t, r.Err())
// Making sure we replayed some samples. We expect >50% samples to be still present.
require.Greater(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.5)
require.Less(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.8)
require.Greater(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.5)
require.Less(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.8)
require.Greater(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.5)
require.Less(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.8)
require.NoError(t, r.Err())
// Making sure we replayed some samples. We expect >50% samples to be still present.
require.Greater(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.5)
require.Less(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.8)
require.Greater(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.5)
require.Less(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.8)
require.Greater(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.5)
require.Less(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.8)
expectedRefSeries := []record.RefSeries{
{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
}
testutil.RequireEqual(t, expectedRefSeries, series)
expectedRefSeries := []record.RefSeries{
{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
}
testutil.RequireEqual(t, expectedRefSeries, series)
expectedRefMetadata := []record.RefMetadata{
{Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
{Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
{Ref: 4, Unit: "unit", Help: "help"},
}
sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref })
require.Equal(t, expectedRefMetadata, metadata)
})
expectedRefMetadata := []record.RefMetadata{
{Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
{Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
{Ref: 4, Unit: "unit", Help: "help"},
}
sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref })
require.Equal(t, expectedRefMetadata, metadata)
})
}
}
}
func TestCheckpointNoTmpFolderAfterError(t *testing.T) {
// Create a new wlog with invalid data.
dir := t.TempDir()
w, err := NewSize(nil, nil, dir, 64*1024, compression.None)
require.NoError(t, err)
var enc record.Encoder
require.NoError(t, w.Log(enc.Series([]record.RefSeries{
{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "2")},
}, nil)))
require.NoError(t, w.Close())
for _, enableSTStorage := range []bool{false, true} {
t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
// Create a new wlog with invalid data.
dir := t.TempDir()
w, err := NewSize(nil, nil, dir, 64*1024, compression.None)
require.NoError(t, err)
enc := record.Encoder{EnableSTStorage: enableSTStorage}
require.NoError(t, w.Log(enc.Series([]record.RefSeries{
{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "2")},
}, nil)))
require.NoError(t, w.Close())
// Corrupt data.
f, err := os.OpenFile(filepath.Join(w.Dir(), "00000000"), os.O_WRONLY, 0o666)
require.NoError(t, err)
_, err = f.WriteAt([]byte{42}, 1)
require.NoError(t, err)
require.NoError(t, f.Close())
// Corrupt data.
f, err := os.OpenFile(filepath.Join(w.Dir(), "00000000"), os.O_WRONLY, 0o666)
require.NoError(t, err)
_, err = f.WriteAt([]byte{42}, 1)
require.NoError(t, err)
require.NoError(t, f.Close())
// Run the checkpoint and since the wlog contains corrupt data this should return an error.
_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0)
require.Error(t, err)
// Run the checkpoint and since the wlog contains corrupt data this should return an error.
_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0, enableSTStorage)
require.Error(t, err)
// Walk the wlog dir to make sure there are no tmp folder left behind after the error.
err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("access err %q: %w", path, err)
}
if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name())
}
return nil
})
require.NoError(t, err)
// Walk the wlog dir to make sure there are no tmp folder left behind after the error.
err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("access err %q: %w", path, err)
}
if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name())
}
return nil
})
require.NoError(t, err)
})
}
}
func TestCheckpointDeletesTemporaryCheckpoints(t *testing.T) {
@ -428,7 +434,7 @@ func TestCheckpointDeletesTemporaryCheckpoints(t *testing.T) {
require.NoError(t, err)
defer w.Close()
_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1000, func(_ chunks.HeadSeriesRef) bool { return true }, 1000)
_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1000, func(_ chunks.HeadSeriesRef) bool { return true }, 1000, false)
require.NoError(t, err)
files, err := os.ReadDir(dir)

View File

@ -543,7 +543,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
}
w.writer.StoreSeries(series, segmentNum)
case record.Samples:
case record.Samples, record.SamplesV2:
// If we're not tailing a segment we can ignore any samples records we see.
// This speeds up replay of the WAL by > 10x.
if !tail {

File diff suppressed because it is too large Load Diff

View File

@ -16,10 +16,21 @@ package kahansum
import "math"
// Inc performs addition of two floating-point numbers using the Kahan summation algorithm.
// We get incorrect results if this function is inlined; see https://github.com/prometheus/prometheus/issues/16714.
//
//go:noinline
func Inc(inc, sum, c float64) (newSum, newC float64) {
// We've seen Kahan summation return less accurate results when Inc function is
// allowed to be inlined (see https://github.com/prometheus/prometheus/pull/16895).
// Go permits fusing float operations (e.g. using fused multiply-add, which allows
// calculating a*b+c without rounding the result of a*b to precision available in float64),
// and Kahan sum is sensitive to float rounding behavior. Instead of forbidding inlining
// (which only disallows fusing operations outside of Inc with operations happening inside)
// and eating the performance cost of non-inlined function calls, we forbid just the fusing
// across Inc call boundary. We can do that by explicitly requesting Inc arguments and results
// to be rounded to float64 precision, as documented in go spec (https://go.dev/ref/spec#Floating_point_operators).
// The following casts are not no-ops!
inc = float64(inc)
sum = float64(sum)
c = float64(c)
t := sum + inc
switch {
case math.IsInf(t, 0):
@ -31,6 +42,9 @@ func Inc(inc, sum, c float64) (newSum, newC float64) {
default:
c += (inc - t) + sum
}
t = float64(t)
c = float64(c)
return t, c
}

96
util/testrecord/record.go Normal file
View File

@ -0,0 +1,96 @@
// Copyright 2025 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package testrecord
import (
"math"
"testing"
"github.com/prometheus/prometheus/tsdb/chunks"
"github.com/prometheus/prometheus/tsdb/record"
)
type RefSamplesCase string
const (
Realistic1000Samples RefSamplesCase = "real1000"
Realistic1000WithVariableSTSamples RefSamplesCase = "real1000-vst"
Realistic1000WithConstSTSamples RefSamplesCase = "real1000-cst"
WorstCase1000 RefSamplesCase = "worst1000"
WorstCase1000WithSTSamples RefSamplesCase = "worst1000-st"
)
func GenTestRefSamplesCase(t testing.TB, c RefSamplesCase) []record.RefSample {
t.Helper()
ret := make([]record.RefSample, 1e3)
switch c {
// Samples are across series, so likely all have the same timestamp.
case Realistic1000Samples:
for i := range ret {
ret[i].Ref = chunks.HeadSeriesRef(i)
ret[i].T = int64(12423423)
ret[i].V = highVarianceFloat(i)
}
// Likely the start times will all be the same with deltas.
case Realistic1000WithConstSTSamples:
for i := range ret {
ret[i].Ref = chunks.HeadSeriesRef(i)
ret[i].ST = int64(12423423)
ret[i].T = int64(12423423 + 15)
ret[i].V = highVarianceFloat(i)
}
// Maybe series have different start times though
case Realistic1000WithVariableSTSamples:
for i := range ret {
ret[i].Ref = chunks.HeadSeriesRef(i)
ret[i].ST = int64((12423423 / 9) * (i % 10))
ret[i].T = int64(12423423)
ret[i].V = highVarianceFloat(i)
}
case WorstCase1000:
for i := range ret {
ret[i].Ref = chunks.HeadSeriesRef(i)
ret[i].T = highVarianceInt(i)
ret[i].V = highVarianceFloat(i)
}
case WorstCase1000WithSTSamples:
for i := range ret {
ret[i].Ref = chunks.HeadSeriesRef(i)
// Worst case is when the values are significantly different
// to each other which breaks delta encoding.
ret[i].ST = highVarianceInt(i+1) / 1024 // Make sure ST is not comparable to T
ret[i].T = highVarianceInt(i)
ret[i].V = highVarianceFloat(i)
}
default:
t.Fatal("unknown case", c)
}
return ret
}
func highVarianceInt(i int) int64 {
if i%2 == 0 {
return math.MinInt32
}
return math.MaxInt32
}
func highVarianceFloat(i int) float64 {
if i%2 == 0 {
return math.SmallestNonzeroFloat32
}
return math.MaxFloat32
}

View File

@ -48,6 +48,8 @@ type RecordsCase struct {
// HistogramFn source histogram for histogram and float histogram records.
// By default, newTestHist is used (exponential bucketing)
HistogramFn func(ref int) *histogram.Histogram
// NoST controls if ref samples should skip generating Start Timestamps. If true, ST is 0.
NoST bool
}
// Records represents batches of generated WAL records.
@ -118,10 +120,18 @@ func GenerateRecords(c RecordsCase) (ret Records) {
Help: fmt.Sprintf("help text for %d", ref),
}
for j := range c.SamplesPerSeries {
ts := c.TsFn(ref, j)
// Keep ST simple for now; we don't test the exact semantics.
// We can improve later (e.g. STsFN).
sts := ts - 1
if c.NoST {
sts = 0
}
ret.Samples[i*c.SamplesPerSeries+j] = record.RefSample{
Ref: chunks.HeadSeriesRef(ref),
T: c.TsFn(ref, j),
V: float64(ref),
ST: sts, T: ts,
V: float64(ref),
}
}
h := c.HistogramFn(ref)

View File

@ -1257,7 +1257,7 @@ const funcDocs: Record<string, React.ReactNode> = {
<>
<p>
<code>histogram_avg(v instant-vector)</code> returns the arithmetic average of observed values stored in each
histogram sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
native histogram sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
</p>
<p>
@ -1283,13 +1283,13 @@ const funcDocs: Record<string, React.ReactNode> = {
histogram_count: (
<>
<p>
<code>histogram_count(v instant-vector)</code> returns the count of observations stored in each histogram sample
in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
<code>histogram_count(v instant-vector)</code> returns the count of observations stored in each native histogram
sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
</p>
<p>
Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each histogram
sample.
Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each native
histogram sample.
</p>
<p>
@ -1574,15 +1574,15 @@ const funcDocs: Record<string, React.ReactNode> = {
<>
<p>
<code>histogram_stddev(v instant-vector)</code> returns the estimated standard deviation of observations for
each histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to have
the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries, the
arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
each native histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to
have the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries,
the arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
ignored and do not show up in the returned vector.
</p>
<p>
Similarly, <code>histogram_stdvar(v instant-vector)</code> returns the estimated standard variance of
observations for each histogram sample in <code>v</code>.
observations for each native histogram sample in <code>v</code>.
</p>
</>
),
@ -1590,28 +1590,28 @@ const funcDocs: Record<string, React.ReactNode> = {
<>
<p>
<code>histogram_stddev(v instant-vector)</code> returns the estimated standard deviation of observations for
each histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to have
the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries, the
arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
each native histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to
have the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries,
the arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
ignored and do not show up in the returned vector.
</p>
<p>
Similarly, <code>histogram_stdvar(v instant-vector)</code> returns the estimated standard variance of
observations for each histogram sample in <code>v</code>.
observations for each native histogram sample in <code>v</code>.
</p>
</>
),
histogram_sum: (
<>
<p>
<code>histogram_count(v instant-vector)</code> returns the count of observations stored in each histogram sample
in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
<code>histogram_count(v instant-vector)</code> returns the count of observations stored in each native histogram
sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
</p>
<p>
Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each histogram
sample.
Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each native
histogram sample.
</p>
<p>

View File

@ -317,10 +317,16 @@ export const functionIdentifierTerms = [
info: 'Join together label values into new label',
type: 'function',
},
{
label: 'first_over_time',
detail: 'function',
info: 'Return the value of the oldest sample in the specified interval',
type: 'function',
},
{
label: 'last_over_time',
detail: 'function',
info: 'The most recent point value in specified interval.',
info: 'Return the value of the most recent sample in the specified interval',
type: 'function',
},
{
@ -371,6 +377,12 @@ export const functionIdentifierTerms = [
info: 'Return the timestamp of the minimum value over time for input series',
type: 'function',
},
{
label: 'ts_of_first_over_time',
detail: 'function',
info: 'Return the timestamp of the first value over time for input series',
type: 'function',
},
{
label: 'ts_of_last_over_time',
detail: 'function',

View File

@ -253,6 +253,11 @@ func (h *Handler) ApplyConfig(conf *config.Config) error {
defer h.mtx.Unlock()
h.config = conf
if conf.StorageConfig.TSDBConfig != nil && conf.StorageConfig.TSDBConfig.Retention != nil {
h.options.TSDBRetentionDuration = conf.StorageConfig.TSDBConfig.Retention.Time
h.options.TSDBMaxBytes = conf.StorageConfig.TSDBConfig.Retention.Size
h.options.TSDBMaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
}
return nil
}
@ -866,20 +871,25 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) {
status.Hostname = hostname
status.ServerTime = time.Now().UTC()
if h.options.TSDBRetentionDuration != 0 {
status.StorageRetention = h.options.TSDBRetentionDuration.String()
h.mtx.RLock()
tsdbRetentionDuration := h.options.TSDBRetentionDuration
tsdbMaxBytes := h.options.TSDBMaxBytes
tsdbMaxPercentage := h.options.TSDBMaxPercentage
h.mtx.RUnlock()
if tsdbRetentionDuration != 0 {
status.StorageRetention = tsdbRetentionDuration.String()
}
if h.options.TSDBMaxBytes != 0 {
if tsdbMaxBytes != 0 {
if status.StorageRetention != "" {
status.StorageRetention += " or "
}
status.StorageRetention += h.options.TSDBMaxBytes.String()
status.StorageRetention += tsdbMaxBytes.String()
}
if h.options.TSDBMaxPercentage != 0 {
if tsdbMaxPercentage != 0 {
if status.StorageRetention != "" {
status.StorageRetention += " or "
}
status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%"
status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(tsdbMaxPercentage), 10) + "%"
}
metrics, err := h.gatherer.Gather()