Merge branch 'refs/heads/main' into skip-wait-for-discovery

2026-05-13 08:36:38 +02:00 · 2026-03-20 23:21:59 +00:00 · 2026-03-20 23:21:59 +00:00 · 3018f35527
commit 3018f35527
parent e22aabf8c9 e9f77bf8b0
80 changed files with 7983 additions and 3349 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -142,9 +142,7 @@ jobs:
          enable_npm: false
      # NOTE: Those tests are based on https://github.com/prometheus/compliance and
      # are executed against the ./cmd/prometheus main package.
-      - run: go test -skip ${SKIP_TESTS} -v --tags=compliance ./compliance/...
-        env:
-          SKIP_TESTS: "TestRemoteWriteSender/prometheus/samples/rw2/start_timestamp*" # TODO(bwplotka): PROM-60
+      - run: go test -v --tags=compliance ./compliance/...

  build:
    name: Build Prometheus for common architectures
--- a/RELEASE.md
+++ b/RELEASE.md
@ -20,7 +20,8 @@ Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/
 | v3.8           | 2025-11-06                                 | Jan Fajerski (GitHub: @jan--f)                                          |
 | v3.9           | 2025-12-18                                 | Bryan Boreham (GitHub: @bboreham)                                       |
 | v3.10          | 2026-02-05                                 | Ganesh Vernekar (Github: @codesome)                                     |
-| v3.11          | 2026-03-19                                 | **volunteer welcome**                                                   |
+| v3.11          | 2026-03-25                                 | Julien Pivotto (GitHub: @roidelapluie)                                  |
+| v3.12          | 2026-05-06                                 | **volunteer welcome**                                                   |

 If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.

--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@ -271,6 +271,7 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
 			case "created-timestamp-zero-ingestion":
 				// NOTE(bwplotka): Once AppendableV1 is removed, there will be only the TSDB and agent flags.
 				c.scrape.EnableStartTimestampZeroIngestion = true
+				c.scrape.ParseST = true
 				c.web.STZeroIngestionEnabled = true
 				c.tsdb.EnableSTAsZeroSample = true
 				c.agent.EnableSTAsZeroSample = true
@ -279,16 +280,19 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
 				// This is to widen the ST support surface.
 				config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
 				config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
-				logger.Info("Experimental start timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+				logger.Info("Experimental start timestamp zero ingestion enabled. OpenMetrics 1.0 parsing will parse <metric>_created metrics as ST instead of normal sample. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+			case "xor2-encoding":
+				c.tsdb.EnableXOR2Encoding = true
+				logger.Info("Experimental XOR2 chunk encoding enabled.")
 			case "st-storage":
-				// TODO(bwplotka): Implement ST Storage as per PROM-60 and document this hidden feature flag.
+				c.scrape.ParseST = true
 				c.tsdb.EnableSTStorage = true
 				c.agent.EnableSTStorage = true

 				// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. This is to widen the ST support surface.
 				config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
 				config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
-				logger.Info("Experimental start timestamp storage enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+				logger.Info("Experimental start timestamp storage enabled. OpenMetrics 1.0 parsing will parse <metric>_created metrics as ST instead of normal sample. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
 			case "delayed-compaction":
 				c.tsdb.EnableDelayedCompaction = true
 				logger.Info("Experimental delayed compaction is enabled.")
@ -601,7 +605,7 @@ func main() {
 	a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
 		Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)

-	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
+	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, st-storage, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers, xor2-encoding. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
 		Default("").StringsVar(&cfg.featureList)

 	a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode)
@ -671,6 +675,18 @@ func main() {
 		os.Exit(2)
 	}

+	// Set TSDB retention defaults from CLI flags before any config file is loaded.
+	// This makes CLI flags act as the default when no retention section is present.
+	cliRetentionDuration := cfg.tsdb.RetentionDuration
+	cliMaxBytes := cfg.tsdb.MaxBytes
+	if cliRetentionDuration == 0 && cliMaxBytes == 0 {
+		cliRetentionDuration = defaultRetentionDuration
+	}
+	config.DefaultTSDBRetentionConfig = config.TSDBRetentionConfig{
+		Time: cliRetentionDuration,
+		Size: cliMaxBytes,
+	}
+
 	// Throw error for invalid config before starting other components.
 	var cfgFile *config.Config
 	if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil {
@ -712,21 +728,11 @@ func main() {
 		logger.Warn("The option --storage.tsdb.block-reload-interval is set to a value less than 1s. Setting it to 1s to avoid overload.")
 		cfg.tsdb.BlockReloadInterval = model.Duration(1 * time.Second)
 	}
-	if cfgFile.StorageConfig.TSDBConfig != nil {
-		cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
-		cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
-		if cfgFile.StorageConfig.TSDBConfig.Retention != nil {
-			if cfgFile.StorageConfig.TSDBConfig.Retention.Time > 0 {
-				cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
-			}
-			if cfgFile.StorageConfig.TSDBConfig.Retention.Size > 0 {
-				cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
-			}
-			if cfgFile.StorageConfig.TSDBConfig.Retention.Percentage > 0 {
-				cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage
-			}
-		}
-	}
+	cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
+	cfg.tsdb.StaleSeriesCompactionThreshold = cfgFile.StorageConfig.TSDBConfig.StaleSeriesCompactionThreshold
+	cfg.tsdb.RetentionDuration = cfgFile.StorageConfig.TSDBConfig.Retention.Time
+	cfg.tsdb.MaxBytes = cfgFile.StorageConfig.TSDBConfig.Retention.Size
+	cfg.tsdb.MaxPercentage = cfgFile.StorageConfig.TSDBConfig.Retention.Percentage

 	// Set Go runtime parameters before we get too far into initialization.
 	updateGoGC(cfgFile, logger)
@ -778,11 +784,6 @@ func main() {
 	cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")

 	if !agentMode {
-		if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 && cfg.tsdb.MaxPercentage == 0 {
-			cfg.tsdb.RetentionDuration = defaultRetentionDuration
-			logger.Info("No time, size or percentage retention was set so using the default time retention", "duration", defaultRetentionDuration)
-		}
-
 		// Check for overflows. This limits our max retention to 100y.
 		if cfg.tsdb.RetentionDuration < 0 {
 			y, err := model.ParseDuration("100y")
@ -1031,8 +1032,29 @@ func main() {

 	reloaders := []reloader{
 		{
-			name:     "db_storage",
-			reloader: localStorage.ApplyConfig,
+			name: "db_storage",
+			reloader: func() func(*config.Config) error {
+				lastTSDBRetention := config.TSDBRetentionConfig{}
+				return func(cfg *config.Config) error {
+					err := localStorage.ApplyConfig(cfg)
+					if err != nil || agentMode || cfg.StorageConfig.TSDBConfig == nil || cfg.StorageConfig.TSDBConfig.Retention == nil {
+						return err
+					}
+
+					curr := cfg.StorageConfig.TSDBConfig.Retention
+					if *curr == lastTSDBRetention {
+						return nil
+					}
+
+					logger.Info("TSDB retention updated",
+						"duration", curr.Time,
+						"size", curr.Size,
+						"percentage", curr.Percentage,
+					)
+					lastTSDBRetention = *curr
+					return nil
+				}
+			}(),
 		}, {
 			name:     "remote_storage",
 			reloader: remoteStorage.ApplyConfig,
@ -2009,6 +2031,7 @@ type tsdbOptions struct {
 	BlockReloadInterval            model.Duration
 	EnableSTAsZeroSample           bool
 	EnableSTStorage                bool
+	EnableXOR2Encoding             bool
 	StaleSeriesCompactionThreshold float64
 }

@ -2039,6 +2062,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
 		FeatureRegistry:                features.DefaultRegistry,
 		EnableSTAsZeroSample:           opts.EnableSTAsZeroSample,
 		EnableSTStorage:                opts.EnableSTStorage,
+		EnableXOR2Encoding:             opts.EnableXOR2Encoding,
 		StaleSeriesCompactionThreshold: opts.StaleSeriesCompactionThreshold,
 	}
 }
--- a/cmd/prometheus/testdata/features.json
+++ b/cmd/prometheus/testdata/features.json
@ -251,6 +251,8 @@
    "exemplar_storage": false,
    "isolation": true,
    "native_histograms": true,
+    "st_storage": false,
+    "xor2_encoding": false,
    "use_uncached_io": false
  },
  "ui": {
--- a/compliance/go.mod
+++ b/compliance/go.mod
@ -2,7 +2,7 @@ module compliance

 go 1.25.0

-require github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275
+require github.com/prometheus/compliance/remotewrite v0.0.0-20260223092825-818283e1171e

 require (
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
--- a/compliance/go.sum
+++ b/compliance/go.sum
@ -30,8 +30,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
 github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
 github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
 github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
-github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275 h1:NLTtFqM00EuqtisYX9P+hQkjoxNxsR2oUQWDluyD2Xw=
-github.com/prometheus/compliance/remotewrite v0.0.0-20260220101514-bccaa3a70275/go.mod h1:VEPZGvpSBbzTKc5acnBj9ng4gfo1DZ4qBsCQnoNFiSc=
+github.com/prometheus/compliance/remotewrite v0.0.0-20260223092825-818283e1171e h1:tT/KBv0aSFq4AElo/bSVvUd+yNKj72hkRsyiKU45nIQ=
+github.com/prometheus/compliance/remotewrite v0.0.0-20260223092825-818283e1171e/go.mod h1:VEPZGvpSBbzTKc5acnBj9ng4gfo1DZ4qBsCQnoNFiSc=
 github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f h1:ERPCnBglv9Z4IjkEBTNbcHmZPlryMldXVWLkk7TeBIY=
 github.com/prometheus/prometheus v0.307.4-0.20251119130332-1174b0ce4f1f/go.mod h1:7hcXiGf9AXIKW2ehWWzxkvRYJTGmc2StUIJ8mprfxjg=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
--- a/compliance/remote_write_sender_test.go
+++ b/compliance/remote_write_sender_test.go
@ -53,7 +53,9 @@ scrape_configs:

 var scrapeConfigTmpl = template.Must(template.New("config").Parse(scrapeConfigTemplate))

-type internalPrometheus struct{}
+type internalPrometheus struct {
+	agentMode bool
+}

 func (p internalPrometheus) Name() string { return "internal-prometheus" }

@ -74,20 +76,33 @@ func (p internalPrometheus) Run(ctx context.Context, opts sender.Options) error
 	}
 	defer os.RemoveAll(dir)

-	return sender.RunCommand(ctx, "../cmd/prometheus", nil,
-		"go", "run", ".",
+	args := []string{
+		"run", ".",
 		"--web.listen-address=0.0.0.0:0",
-		fmt.Sprintf("--storage.tsdb.path=%v", dir),
 		fmt.Sprintf("--config.file=%s", configFile),
 		// Set important flags for the full remote write compliance:
 		"--enable-feature=st-storage",
-	)
+	}
+	if p.agentMode {
+		args = append(args, fmt.Sprintf("--storage.agent.path=%v", dir), "--agent")
+	} else {
+		args = append(args, fmt.Sprintf("--storage.tsdb.path=%v", dir))
+	}
+	return sender.RunCommand(ctx, "../cmd/prometheus", nil, "go", args...)
 }

 var _ sender.Sender = internalPrometheus{}

 // TestRemoteWriteSender runs remote write sender compliance tests defined in
-// https://github.com/prometheus/compliance/tree/main/remotewrite/sender
+// https://github.com/prometheus/compliance/tree/main/remotewrite/sender against
+// both agent and server modes.
 func TestRemoteWriteSender(t *testing.T) {
-	sender.RunTests(t, internalPrometheus{}, sender.ComplianceTests())
+	t.Run("mode=server", func(t *testing.T) {
+		t.Parallel()
+		sender.RunTests(t, internalPrometheus{}, sender.ComplianceTests())
+	})
+	t.Run("mode=agent", func(t *testing.T) {
+		t.Parallel()
+		sender.RunTests(t, internalPrometheus{agentMode: true}, sender.ComplianceTests())
+	})
 }
--- a/config/config.go
+++ b/config/config.go
@ -83,6 +83,13 @@ func Load(s string, logger *slog.Logger) (*Config, error) {
 		return nil, err
 	}

+	// When the config body is empty, UnmarshalYAML is never called, so
+	// TSDBConfig may still be nil.
+	if cfg.StorageConfig.TSDBConfig == nil {
+		retention := DefaultTSDBRetentionConfig
+		cfg.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
+	}
+
 	b := labels.NewScratchBuilder(0)
 	cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) {
 		newV := os.Expand(v.Value, func(s string) string {
@ -276,6 +283,9 @@ var (
 		// For backwards compatibility.
 		LabelNamePreserveMultipleUnderscores: true,
 	}
+
+	// DefaultTSDBRetentionConfig is the default TSDB retention configuration.
+	DefaultTSDBRetentionConfig TSDBRetentionConfig
 )

 // Config is the top-level configuration for Prometheus's config files.
@ -405,6 +415,13 @@ func (c *Config) UnmarshalYAML(unmarshal func(any) error) error {
 		c.Runtime = DefaultRuntimeConfig
 	}

+	// If no storage.tsdb section is present, TSDBConfig is nil and its
+	// UnmarshalYAML never runs. Inject the default retention here.
+	if c.StorageConfig.TSDBConfig == nil {
+		retention := DefaultTSDBRetentionConfig
+		c.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
+	}
+
 	for _, rf := range c.RuleFiles {
 		if !patRulePath.MatchString(rf) {
 			return fmt.Errorf("invalid rule file path %q", rf)
@ -1097,6 +1114,22 @@ type TSDBRetentionConfig struct {
 	Percentage uint `yaml:"percentage,omitempty"`
 }

+// UnmarshalYAML implements the yaml.Unmarshaler interface.
+func (t *TSDBRetentionConfig) UnmarshalYAML(unmarshal func(any) error) error {
+	*t = TSDBRetentionConfig{}
+	type plain TSDBRetentionConfig
+	if err := unmarshal((*plain)(t)); err != nil {
+		return err
+	}
+	if t.Size < 0 {
+		return fmt.Errorf("'storage.tsdb.retention.size' must be greater than or equal to 0, got %v", t.Size)
+	}
+	if t.Percentage > 100 {
+		return fmt.Errorf("'storage.tsdb.retention.percentage' must be in the range [0, 100], got %v", t.Percentage)
+	}
+	return nil
+}
+
 // TSDBConfig configures runtime reloadable configuration options.
 type TSDBConfig struct {
 	// OutOfOrderTimeWindow sets how long back in time an out-of-order sample can be inserted
@ -1127,6 +1160,11 @@ func (t *TSDBConfig) UnmarshalYAML(unmarshal func(any) error) error {

 	t.OutOfOrderTimeWindow = time.Duration(t.OutOfOrderTimeWindowFlag).Milliseconds()

+	if t.Retention == nil {
+		retention := DefaultTSDBRetentionConfig
+		t.Retention = &retention
+	}
+
 	return nil
 }

--- a/config/config_default_test.go
+++ b/config/config_default_test.go
@ -20,9 +20,10 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
 var ruleFilesExpectedConf = &Config{
 	loaded: true,

-	GlobalConfig: DefaultGlobalConfig,
-	Runtime:      DefaultRuntimeConfig,
-	OTLPConfig:   DefaultOTLPConfig,
+	GlobalConfig:  DefaultGlobalConfig,
+	Runtime:       DefaultRuntimeConfig,
+	OTLPConfig:    DefaultOTLPConfig,
+	StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}},
 	RuleFiles: []string{
 		"testdata/first.rules",
 		"testdata/rules/second.rules",
--- a/config/config_test.go
+++ b/config/config_test.go
@ -2626,6 +2626,22 @@ var expectedErrors = []struct {
 		filename: "stackit_endpoint.bad.yml",
 		errMsg:   "invalid endpoint",
 	},
+	{
+		filename: "tsdb_retention_time.bad.yml",
+		errMsg:   `not a valid duration string: "-1h"`,
+	},
+	{
+		filename: "tsdb_retention_size.bad.yml",
+		errMsg:   `'storage.tsdb.retention.size' must be greater than or equal to 0`,
+	},
+	{
+		filename: "tsdb_retention_percentage.bad.yml",
+		errMsg:   `'storage.tsdb.retention.percentage' must be in the range [0, 100]`,
+	},
+	{
+		filename: "tsdb_retention_percentage_negative.bad.yml",
+		errMsg:   "cannot unmarshal !!int `-1` into uint",
+	},
 }

 func TestBadConfigs(t *testing.T) {
@ -2649,6 +2665,8 @@ func TestEmptyConfig(t *testing.T) {
 	require.NoError(t, err)
 	exp := DefaultConfig
 	exp.loaded = true
+	retention := DefaultTSDBRetentionConfig
+	exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
 	require.Equal(t, exp, *c)
 	require.Equal(t, 75, c.Runtime.GoGC)
 }
@ -2700,6 +2718,10 @@ func TestGlobalConfig(t *testing.T) {
 		require.NoError(t, err)
 		exp := DefaultConfig
 		exp.loaded = true
+		// TSDBConfig is always injected by Config.UnmarshalYAML even when no
+		// storage.tsdb section is present, so the expected config must include it.
+		retention := DefaultTSDBRetentionConfig
+		exp.StorageConfig.TSDBConfig = &TSDBConfig{Retention: &retention}
 		require.Equal(t, exp, *c)
 	})

--- a/config/config_windows_test.go
+++ b/config/config_windows_test.go
@ -18,8 +18,9 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
 var ruleFilesExpectedConf = &Config{
 	loaded: true,

-	GlobalConfig: DefaultGlobalConfig,
-	Runtime:      DefaultRuntimeConfig,
+	GlobalConfig:  DefaultGlobalConfig,
+	Runtime:       DefaultRuntimeConfig,
+	StorageConfig: StorageConfig{TSDBConfig: &TSDBConfig{Retention: &TSDBRetentionConfig{}}},
 	RuleFiles: []string{
 		"testdata\\first.rules",
 		"testdata\\rules\\second.rules",
--- a/config/testdata/tsdb_retention_percentage.bad.yml
+++ b/config/testdata/tsdb_retention_percentage.bad.yml
@ -0,0 +1,4 @@
+storage:
+  tsdb:
+    retention:
+      percentage: 101
--- a/config/testdata/tsdb_retention_percentage_negative.bad.yml
+++ b/config/testdata/tsdb_retention_percentage_negative.bad.yml
@ -0,0 +1,4 @@
+storage:
+  tsdb:
+    retention:
+      percentage: -1
--- a/config/testdata/tsdb_retention_size.bad.yml
+++ b/config/testdata/tsdb_retention_size.bad.yml
@ -0,0 +1,4 @@
+storage:
+  tsdb:
+    retention:
+      size: -1GB
--- a/config/testdata/tsdb_retention_time.bad.yml
+++ b/config/testdata/tsdb_retention_time.bad.yml
@ -0,0 +1,4 @@
+storage:
+  tsdb:
+    retention:
+      time: -1h
--- a/discovery/azure/azure.go
+++ b/discovery/azure/azure.go
@ -298,7 +298,10 @@ func newCredential(cfg SDConfig, policyClientOptions policy.ClientOptions) (azco
 		}
 		credential = azcore.TokenCredential(workloadIdentityCredential)
 	case authMethodManagedIdentity:
-		options := &azidentity.ManagedIdentityCredentialOptions{ClientOptions: policyClientOptions, ID: azidentity.ClientID(cfg.ClientID)}
+		options := &azidentity.ManagedIdentityCredentialOptions{ClientOptions: policyClientOptions}
+		if cfg.ClientID != "" {
+			options.ID = azidentity.ClientID(cfg.ClientID)
+		}
 		managedIdentityCredential, err := azidentity.NewManagedIdentityCredential(options)
 		if err != nil {
 			return nil, err
--- a/discovery/azure/azure_test.go
+++ b/discovery/azure/azure_test.go
@ -24,6 +24,7 @@ import (
 	"github.com/Azure/azure-sdk-for-go/sdk/azcore"
 	"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
 	azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake"
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
 	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
 	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
 	fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake"
@ -490,6 +491,27 @@ func TestNewAzureResourceFromID(t *testing.T) {
 	}
 }

+func TestNewCredentialManagedIdentity(t *testing.T) {
+	// Test that system-assigned managed identity (empty ClientID) creates
+	// a valid credential. Previously, an empty ClientID was passed as
+	// azidentity.ClientID("") which is not nil and caused Azure SDK to
+	// look up a non-existent user-assigned identity instead of falling
+	// back to system-assigned identity.
+	cfg := SDConfig{
+		AuthenticationMethod: authMethodManagedIdentity,
+		ClientID:             "",
+	}
+	cred, err := newCredential(cfg, policy.ClientOptions{})
+	require.NoError(t, err)
+	require.NotNil(t, cred)
+
+	// Test that user-assigned managed identity (non-empty ClientID) also works.
+	cfg.ClientID = "00000000-0000-0000-0000-000000000000"
+	cred, err = newCredential(cfg, policy.ClientOptions{})
+	require.NoError(t, err)
+	require.NotNil(t, cred)
+}
+
 func TestAzureRefresh(t *testing.T) {
 	tests := []struct {
 		scenario       string
--- a/discovery/hetzner/hcloud.go
+++ b/discovery/hetzner/hcloud.go
@ -38,8 +38,10 @@ const (
 	hetznerLabelHcloudImageOSVersion                = hetznerHcloudLabelPrefix + "image_os_version"
 	hetznerLabelHcloudImageOSFlavor                 = hetznerHcloudLabelPrefix + "image_os_flavor"
 	hetznerLabelHcloudPrivateIPv4                   = hetznerHcloudLabelPrefix + "private_ipv4_"
-	hetznerLabelHcloudDatacenterLocation            = hetznerHcloudLabelPrefix + "datacenter_location"
-	hetznerLabelHcloudDatacenterLocationNetworkZone = hetznerHcloudLabelPrefix + "datacenter_location_network_zone"
+	hetznerLabelHcloudLocation                      = hetznerHcloudLabelPrefix + "location"
+	hetznerLabelHcloudLocationNetworkZone           = hetznerHcloudLabelPrefix + "location_network_zone"
+	hetznerLabelHcloudDatacenterLocation            = hetznerHcloudLabelPrefix + "datacenter_location"              // Label name kept for backward compatibility
+	hetznerLabelHcloudDatacenterLocationNetworkZone = hetznerHcloudLabelPrefix + "datacenter_location_network_zone" // Label name kept for backward compatibility
 	hetznerLabelHcloudCPUCores                      = hetznerHcloudLabelPrefix + "cpu_cores"
 	hetznerLabelHcloudCPUType                       = hetznerHcloudLabelPrefix + "cpu_type"
 	hetznerLabelHcloudMemoryGB                      = hetznerHcloudLabelPrefix + "memory_size_gb"
@ -98,13 +100,14 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
 			hetznerLabelRole:              model.LabelValue(HetznerRoleHcloud),
 			hetznerLabelServerID:          model.LabelValue(strconv.FormatInt(server.ID, 10)),
 			hetznerLabelServerName:        model.LabelValue(server.Name),
-			hetznerLabelDatacenter:        model.LabelValue(server.Datacenter.Name), //nolint:staticcheck // server.Datacenter is deprecated but kept for backwards compatibility until the next minor release
 			hetznerLabelPublicIPv4:        model.LabelValue(server.PublicNet.IPv4.IP.String()),
 			hetznerLabelPublicIPv6Network: model.LabelValue(server.PublicNet.IPv6.Network.String()),
 			hetznerLabelServerStatus:      model.LabelValue(server.Status),

-			hetznerLabelHcloudDatacenterLocation:            model.LabelValue(server.Datacenter.Location.Name),        //nolint:staticcheck // server.Datacenter is deprecated but kept for backwards compatibility until the next minor release
-			hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Datacenter.Location.NetworkZone), //nolint:staticcheck // server.Datacenter is deprecated but kept for backwards compatibility until the next minor release
+			hetznerLabelHcloudLocation:                      model.LabelValue(server.Location.Name),
+			hetznerLabelHcloudLocationNetworkZone:           model.LabelValue(server.Location.NetworkZone),
+			hetznerLabelHcloudDatacenterLocation:            model.LabelValue(server.Location.Name),        // Label name kept for backward compatibility
+			hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Location.NetworkZone), // Label name kept for backward compatibility
 			hetznerLabelHcloudType:                          model.LabelValue(server.ServerType.Name),
 			hetznerLabelHcloudCPUCores:                      model.LabelValue(strconv.Itoa(server.ServerType.Cores)),
 			hetznerLabelHcloudCPUType:                       model.LabelValue(server.ServerType.CPUType),
@ -114,6 +117,12 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
 			model.AddressLabel: model.LabelValue(net.JoinHostPort(server.PublicNet.IPv4.IP.String(), strconv.FormatUint(uint64(d.port), 10))),
 		}

+		// [hcloud.Server.Datacenter] is deprecated and will be removed after 1 July 2026.
+		// See https://docs.hetzner.cloud/changelog#2025-12-16-phasing-out-datacenters
+		if server.Datacenter != nil { // nolint: staticcheck
+			labels[hetznerLabelDatacenter] = model.LabelValue(server.Datacenter.Name) // nolint: staticcheck
+		}
+
 		if server.Image != nil {
 			labels[hetznerLabelHcloudImageName] = model.LabelValue(server.Image.Name)
 			labels[hetznerLabelHcloudImageDescription] = model.LabelValue(server.Image.Description)
--- a/discovery/hetzner/hcloud_test.go
+++ b/discovery/hetzner/hcloud_test.go
@ -69,6 +69,8 @@ func TestHCloudSDRefresh(t *testing.T) {
 			"__meta_hetzner_hcloud_image_description":                model.LabelValue("Ubuntu 20.04 Standard 64 bit"),
 			"__meta_hetzner_hcloud_image_os_flavor":                  model.LabelValue("ubuntu"),
 			"__meta_hetzner_hcloud_image_os_version":                 model.LabelValue("20.04"),
+			"__meta_hetzner_hcloud_location":                         model.LabelValue("fsn1"),
+			"__meta_hetzner_hcloud_location_network_zone":            model.LabelValue("eu-central"),
 			"__meta_hetzner_hcloud_datacenter_location":              model.LabelValue("fsn1"),
 			"__meta_hetzner_hcloud_datacenter_location_network_zone": model.LabelValue("eu-central"),
 			"__meta_hetzner_hcloud_cpu_cores":                        model.LabelValue("1"),
@ -93,6 +95,8 @@ func TestHCloudSDRefresh(t *testing.T) {
 			"__meta_hetzner_hcloud_image_description":                model.LabelValue("Ubuntu 20.04 Standard 64 bit"),
 			"__meta_hetzner_hcloud_image_os_flavor":                  model.LabelValue("ubuntu"),
 			"__meta_hetzner_hcloud_image_os_version":                 model.LabelValue("20.04"),
+			"__meta_hetzner_hcloud_location":                         model.LabelValue("fsn1"),
+			"__meta_hetzner_hcloud_location_network_zone":            model.LabelValue("eu-central"),
 			"__meta_hetzner_hcloud_datacenter_location":              model.LabelValue("fsn1"),
 			"__meta_hetzner_hcloud_datacenter_location_network_zone": model.LabelValue("eu-central"),
 			"__meta_hetzner_hcloud_cpu_cores":                        model.LabelValue("2"),
@ -114,6 +118,8 @@ func TestHCloudSDRefresh(t *testing.T) {
 			"__meta_hetzner_datacenter":                              model.LabelValue("fsn1-dc14"),
 			"__meta_hetzner_public_ipv4":                             model.LabelValue("1.2.3.6"),
 			"__meta_hetzner_public_ipv6_network":                     model.LabelValue("2001:db7::/64"),
+			"__meta_hetzner_hcloud_location":                         model.LabelValue("fsn1"),
+			"__meta_hetzner_hcloud_location_network_zone":            model.LabelValue("eu-central"),
 			"__meta_hetzner_hcloud_datacenter_location":              model.LabelValue("fsn1"),
 			"__meta_hetzner_hcloud_datacenter_location_network_zone": model.LabelValue("eu-central"),
 			"__meta_hetzner_hcloud_cpu_cores":                        model.LabelValue("2"),
--- a/discovery/hetzner/hetzner.go
+++ b/discovery/hetzner/hetzner.go
@ -36,7 +36,7 @@ const (
 	hetznerLabelServerID          = hetznerLabelPrefix + "server_id"
 	hetznerLabelServerName        = hetznerLabelPrefix + "server_name"
 	hetznerLabelServerStatus      = hetznerLabelPrefix + "server_status"
-	hetznerLabelDatacenter        = hetznerLabelPrefix + "datacenter"
+	hetznerLabelDatacenter        = hetznerLabelPrefix + "datacenter" // Label name kept for backward compatibility
 	hetznerLabelPublicIPv4        = hetznerLabelPrefix + "public_ipv4"
 	hetznerLabelPublicIPv6Network = hetznerLabelPrefix + "public_ipv6_network"
 )
--- a/discovery/hetzner/mock_test.go
+++ b/discovery/hetzner/mock_test.go
@ -124,6 +124,16 @@ func (m *SDMock) HandleHcloudServers() {
        "storage_type": "local",
        "cpu_type": "shared"
      },
+      "location": {
+        "id": 1,
+        "name": "fsn1",
+        "description": "Falkenstein DC Park 1",
+        "country": "DE",
+        "city": "Falkenstein",
+        "latitude": 50.47612,
+        "longitude": 12.370071,
+        "network_zone": "eu-central"
+      },
      "datacenter": {
        "id": 1,
        "name": "fsn1-dc8",
@ -244,6 +254,16 @@ func (m *SDMock) HandleHcloudServers() {
        "storage_type": "local",
        "cpu_type": "shared"
      },
+      "location": {
+        "id": 1,
+        "name": "fsn1",
+        "description": "Falkenstein DC Park 1",
+        "country": "DE",
+        "city": "Falkenstein",
+        "latitude": 50.47612,
+        "longitude": 12.370071,
+        "network_zone": "eu-central"
+      },
      "datacenter": {
        "id": 2,
        "name": "fsn1-dc14",
@ -365,6 +385,16 @@ func (m *SDMock) HandleHcloudServers() {
        "storage_type": "local",
        "cpu_type": "shared"
      },
+      "location": {
+        "id": 1,
+        "name": "fsn1",
+        "description": "Falkenstein DC Park 1",
+        "country": "DE",
+        "city": "Falkenstein",
+        "latitude": 50.47612,
+        "longitude": 12.370071,
+        "network_zone": "eu-central"
+      },
      "datacenter": {
        "id": 2,
        "name": "fsn1-dc14",
--- a/discovery/hetzner/robot.go
+++ b/discovery/hetzner/robot.go
@ -34,9 +34,10 @@ import (
 )

 const (
-	hetznerRobotLabelPrefix    = hetznerLabelPrefix + "robot_"
-	hetznerLabelRobotProduct   = hetznerRobotLabelPrefix + "product"
-	hetznerLabelRobotCancelled = hetznerRobotLabelPrefix + "cancelled"
+	hetznerRobotLabelPrefix     = hetznerLabelPrefix + "robot_"
+	hetznerLabelRobotDatacenter = hetznerRobotLabelPrefix + "datacenter"
+	hetznerLabelRobotProduct    = hetznerRobotLabelPrefix + "product"
+	hetznerLabelRobotCancelled  = hetznerRobotLabelPrefix + "cancelled"
 )

 var userAgent = version.PrometheusUserAgent()
@ -105,14 +106,15 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error)
 	targets := make([]model.LabelSet, len(servers))
 	for i, server := range servers {
 		labels := model.LabelSet{
-			hetznerLabelRole:           model.LabelValue(HetznerRoleRobot),
-			hetznerLabelServerID:       model.LabelValue(strconv.Itoa(server.Server.ServerNumber)),
-			hetznerLabelServerName:     model.LabelValue(server.Server.ServerName),
-			hetznerLabelDatacenter:     model.LabelValue(strings.ToLower(server.Server.Dc)),
-			hetznerLabelPublicIPv4:     model.LabelValue(server.Server.ServerIP),
-			hetznerLabelServerStatus:   model.LabelValue(server.Server.Status),
-			hetznerLabelRobotProduct:   model.LabelValue(server.Server.Product),
-			hetznerLabelRobotCancelled: model.LabelValue(strconv.FormatBool(server.Server.Canceled)),
+			hetznerLabelRole:            model.LabelValue(HetznerRoleRobot),
+			hetznerLabelServerID:        model.LabelValue(strconv.Itoa(server.Server.ServerNumber)),
+			hetznerLabelServerName:      model.LabelValue(server.Server.ServerName),
+			hetznerLabelDatacenter:      model.LabelValue(strings.ToLower(server.Server.Dc)), // Label name kept for backward compatibility
+			hetznerLabelPublicIPv4:      model.LabelValue(server.Server.ServerIP),
+			hetznerLabelServerStatus:    model.LabelValue(server.Server.Status),
+			hetznerLabelRobotDatacenter: model.LabelValue(strings.ToLower(server.Server.Dc)),
+			hetznerLabelRobotProduct:    model.LabelValue(server.Server.Product),
+			hetznerLabelRobotCancelled:  model.LabelValue(strconv.FormatBool(server.Server.Canceled)),

 			model.AddressLabel: model.LabelValue(net.JoinHostPort(server.Server.ServerIP, strconv.FormatUint(uint64(d.port), 10))),
 		}
--- a/discovery/hetzner/robot_test.go
+++ b/discovery/hetzner/robot_test.go
@ -64,19 +64,21 @@ func TestRobotSDRefresh(t *testing.T) {
 			"__meta_hetzner_public_ipv4":         model.LabelValue("123.123.123.123"),
 			"__meta_hetzner_public_ipv6_network": model.LabelValue("2a01:4f8:111:4221::/64"),
 			"__meta_hetzner_datacenter":          model.LabelValue("nbg1-dc1"),
+			"__meta_hetzner_robot_datacenter":    model.LabelValue("nbg1-dc1"),
 			"__meta_hetzner_robot_product":       model.LabelValue("DS 3000"),
 			"__meta_hetzner_robot_cancelled":     model.LabelValue("false"),
 		},
 		{
-			"__address__":                    model.LabelValue("123.123.123.124:80"),
-			"__meta_hetzner_role":            model.LabelValue("robot"),
-			"__meta_hetzner_server_id":       model.LabelValue("421"),
-			"__meta_hetzner_server_name":     model.LabelValue("server2"),
-			"__meta_hetzner_server_status":   model.LabelValue("in process"),
-			"__meta_hetzner_public_ipv4":     model.LabelValue("123.123.123.124"),
-			"__meta_hetzner_datacenter":      model.LabelValue("fsn1-dc10"),
-			"__meta_hetzner_robot_product":   model.LabelValue("X5"),
-			"__meta_hetzner_robot_cancelled": model.LabelValue("true"),
+			"__address__":                     model.LabelValue("123.123.123.124:80"),
+			"__meta_hetzner_role":             model.LabelValue("robot"),
+			"__meta_hetzner_server_id":        model.LabelValue("421"),
+			"__meta_hetzner_server_name":      model.LabelValue("server2"),
+			"__meta_hetzner_server_status":    model.LabelValue("in process"),
+			"__meta_hetzner_public_ipv4":      model.LabelValue("123.123.123.124"),
+			"__meta_hetzner_datacenter":       model.LabelValue("fsn1-dc10"),
+			"__meta_hetzner_robot_datacenter": model.LabelValue("fsn1-dc10"),
+			"__meta_hetzner_robot_product":    model.LabelValue("X5"),
+			"__meta_hetzner_robot_cancelled":  model.LabelValue("true"),
 		},
 	} {
 		t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@ -59,7 +59,7 @@ The Prometheus monitoring server
 | <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
 | <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
 | <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
-| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
+| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, created-timestamp-zero-ingestion, st-storage, concurrent-rule-eval, delayed-compaction, old-ui, otlp-deltatocumulative, promql-duration-expr, use-uncached-io, promql-extended-range-selectors, promql-binop-fill-modifiers, xor2-encoding. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
 | <code class="text-nowrap">--agent</code> | Run Prometheus in 'Agent mode'. |  |
 | <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
 | <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@ -2238,7 +2238,10 @@ The following meta labels are available on all targets during [relabeling](#rela
 * `__meta_hetzner_server_status`: the status of the server
 * `__meta_hetzner_public_ipv4`: the public ipv4 address of the server
 * `__meta_hetzner_public_ipv6_network`: the public ipv6 network (/64) of the server
-* `__meta_hetzner_datacenter`: the datacenter of the server
+
+Note that the `__meta_hetzner_datacenter` label is deprecated for both roles `robot` and `hcloud`:
+- For the `robot` role, the replacement label is `__meta_hetzner_robot_datacenter`.
+- For the `hcloud` role, the label will be removed after 1 July 2026. For more details, see the [changelog](https://docs.hetzner.cloud/changelog#2025-12-16-phasing-out-datacenters).

 The labels below are only available for targets with `role` set to `hcloud`:

@ -2246,8 +2249,10 @@ The labels below are only available for targets with `role` set to `hcloud`:
 * `__meta_hetzner_hcloud_image_description`: the description of the server image
 * `__meta_hetzner_hcloud_image_os_flavor`: the OS flavor of the server image
 * `__meta_hetzner_hcloud_image_os_version`: the OS version of the server image
-* `__meta_hetzner_hcloud_datacenter_location`: the location of the server
-* `__meta_hetzner_hcloud_datacenter_location_network_zone`: the network zone of the server
+* `__meta_hetzner_hcloud_location`: the location of the server
+* `__meta_hetzner_hcloud_location_network_zone`: the network zone of the server
+* `__meta_hetzner_hcloud_datacenter_location`: the location of the server (deprecated in favor of `__meta_hetzner_hcloud_location`)
+* `__meta_hetzner_hcloud_datacenter_location_network_zone`: the network zone of the server (deprecated in favor of `__meta_hetzner_hcloud_location_network_zone`)
 * `__meta_hetzner_hcloud_server_type`: the type of the server
 * `__meta_hetzner_hcloud_cpu_cores`: the CPU cores count of the server
 * `__meta_hetzner_hcloud_cpu_type`: the CPU type of the server (shared or dedicated)
@ -2259,6 +2264,7 @@ The labels below are only available for targets with `role` set to `hcloud`:

 The labels below are only available for targets with `role` set to `robot`:

+* `__meta_hetzner_robot_datacenter`: the datacenter of the server
 * `__meta_hetzner_robot_product`: the product of the server
 * `__meta_hetzner_robot_cancelled`: the server cancellation status

@ -3871,9 +3877,9 @@ with this feature.
 # or when a compaction completes, whichever comes first.
 [ retention: <retention> ] :
  # How long to retain samples in storage. If neither this option nor the size option
-  # is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms.
+  # is set, the retention time defaults to 15d. Setting this to 0 disables time-based retention.
  # This option takes precedence over the deprecated command-line flag --storage.tsdb.retention.time.
-  [ time: <duration> | default = 15d ]
+  [ time: <duration> ]

  # Maximum number of bytes that can be stored for blocks. A unit is required,
  # supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B.
--- a/docs/feature_flags.md
+++ b/docs/feature_flags.md
@ -77,6 +77,30 @@ Therefore, when `created-timestamp-zero-ingestion` is enabled Prometheus changes

 Besides enabling this feature in Prometheus, start timestamps need to be exposed by the application being scraped.

+## Start timestamp (ST) native storage
+
+`--enable-feature=st-storage`
+
+Enables the storage of start timestamps (ST) per sample, through WAL, TSDB/Agent and Remote-Write 2.0. This option
+allows preserving the exact ST value as it was presented from scrape and receive protocols. In the future this feature
+is meant to be a replacement of `created-timestamp-zero-ingestion` which injects synthetic 0 samples.
+
+Currently, Prometheus supports start timestamps on:
+
+* `PrometheusProto`
+* `OpenMetrics1.0.0`
+
+`PrometheusProto` is recommended, due to efficiency of ST passing.
+
+Besides enabling this feature in Prometheus, start timestamps need to be exposed by the application being scraped.
+
+> NOTE: This is an experimental feature with known limitations until fully implemented.
+> * It introduces new WAL record type (SamplesV2) that can only be replayed with Prometheus 3.11 or later versions.
+> * For persistent storage support (TSDB blocks), you need to manually opt-in for XOR2 chunk format ([`xor2-encoding` flag](#xor2-chunk-encoding)). 
+> This might change later once we finish experimentation phase with XOR2.
+> * ST for native histograms and NHCBs are not yet implemented (see [#18315](https://github.com/prometheus/prometheus/issues/18315)).
+> * PromQL use of ST is out of scope of this feature.
+
 ## Concurrent evaluation of independent rules

 `--enable-feature=concurrent-rule-eval`
@ -306,6 +330,17 @@ This is currently implemented using direct I/O.

 For more details, see the [proposal](https://github.com/prometheus/proposals/pull/45).

+## XOR2 chunk encoding
+
+`--enable-feature=xor2-encoding`
+
+> WARNING: This is highly experimental and risky setting:
+> * Chunks encoded with XOR2 **cannot be read by older Prometheus versions** that do not support the encoding. Once enabled and data is written, you need to **manually delete blocks from the disk**, otherwise Prometheus will return error on all queries.
+> * We are still experimenting on the final encoding. As of now this encoding can change in any Prometheus version. All your persistent block data will be lost between versions.
+> * This is encoding is new, meaning downstream tools and LTS systems might now support it yet (e.g. Thanos sidecar uploaded blocks).
+
+This setting enables the new XOR2 chunk encoding for float samples, which provides better disk compression than the default XOR encoding for typical Prometheus workloads. This format also allow storing Start Timestamp (ST).
+
 ## Extended Range Selectors

 `--enable-feature=promql-extended-range-selectors`
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@ -219,7 +219,7 @@ to their original value. Histogram samples in the input vector are ignored silen
 ## `histogram_avg()`

 `histogram_avg(v instant-vector)` returns the arithmetic average of observed
-values stored in each histogram sample in `v`. Float samples are ignored and do
+values stored in each native histogram sample in `v`. Float samples are ignored and do
 not show up in the returned vector.

 Use `histogram_avg` as demonstrated below to compute the average request duration
@ -236,11 +236,11 @@ Which is equivalent to the following query:
 ## `histogram_count()` and `histogram_sum()`

 `histogram_count(v instant-vector)` returns the count of observations stored in
-each histogram sample in `v`. Float samples are ignored and do not show up in
+each native histogram sample in `v`. Float samples are ignored and do not show up in
 the returned vector.

 Similarly, `histogram_sum(v instant-vector)` returns the sum of observations
-stored in each histogram sample.
+stored in each native histogram sample.

 Use `histogram_count` in the following way to calculate a rate of observations
 (in this case corresponding to “requests per second”) from a series of
@ -453,14 +453,14 @@ histogram_quantiles(sum(rate(foo[1m])), "quantile", 0.9, 0.99)
 ## `histogram_stddev()` and `histogram_stdvar()`

 `histogram_stddev(v instant-vector)` returns the estimated standard deviation
-of observations for each histogram sample in `v`. For this estimation, all observations
+of observations for each native histogram sample in `v`. For this estimation, all observations
 in a bucket are assumed to have the value of the mean of the bucket boundaries. For
 the zero bucket and for buckets with custom boundaries, the arithmetic mean is used.
 For the usual exponential buckets, the geometric mean is used. Float samples are ignored
 and do not show up in the returned vector.

 Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard
-variance of observations for each histogram sample in `v`.
+variance of observations for each native histogram sample in `v`.

 ## `hour()`

--- a/scrape/helpers_test.go
+++ b/scrape/helpers_test.go
@ -105,7 +105,9 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo
 		enableCompression:   true,
 		validationScheme:    model.UTF8Validation,
 		symbolTable:         labels.NewSymbolTable(),
-		appendMetadataToWAL: true, // Tests assumes it's enabled, unless explicitly turned off.
+		// Tests assume those features are enabled, unless explicitly turned off.
+		appendMetadataToWAL: true,
+		parseST:             true,
 	}
 	for _, o := range opts {
 		o(sl)
--- a/scrape/manager.go
+++ b/scrape/manager.go
@ -115,8 +115,26 @@ type Options struct {

 	// Option to enable the ingestion of the created timestamp as a synthetic zero sample.
 	// See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md
+	//
+	// NOTE: This option has no effect for AppenderV2 and will be removed with the AppenderV1
+	// removal.
 	EnableStartTimestampZeroIngestion bool

+	// ParseST controls if ST should be parsed and appended from the scrape formats.
+	// This should be by default true, but it's opt-in for OpenMetrics (OM) 1.0 reasons and might be moved
+	// to OM  1.0 only flow.
+	//
+	// Specifically for OpenMetrics 1.0 flow, it can have some additional effects that might not be desired for non-ST users:
+	//
+	// * OpenMetrics 1.0 <metric>_created series will be parsed as ST instead of normal sample. Could be breaking
+	// if downstream user depends on _created metric. TODO(bwplotka): Add "preserveOMLines" hidden option?
+	// * Add relatively small (but still) overhead.
+	// * Can yield wrong ST values in rare edge cases (unknown metadata and metric name collisions).
+	//
+	// This only applies to AppenderV2 flow (Prometheus default).
+	// TODO: Move this option to OM1 parser and use only on OM1 flow.
+	ParseST bool
+
 	// EnableTypeAndUnitLabels represents type-and-unit-labels feature flag.
 	EnableTypeAndUnitLabels bool

--- a/scrape/manager_test.go
+++ b/scrape/manager_test.go
@ -768,6 +768,7 @@ func TestManagerSTZeroIngestion(t *testing.T) {
 							app := teststorage.NewAppendable()
 							discoveryManager, scrapeManager := runManagers(t, ctx, &Options{
 								EnableStartTimestampZeroIngestion: testSTZeroIngest,
+								ParseST:                           testSTZeroIngest,
 								skipJitterOffsetting:              true,
 							}, app, nil)
 							defer scrapeManager.Stop()
@ -954,6 +955,7 @@ func TestManagerSTZeroIngestionHistogram(t *testing.T) {
 			app := teststorage.NewAppendable()
 			discoveryManager, scrapeManager := runManagers(t, ctx, &Options{
 				EnableStartTimestampZeroIngestion: tc.enableSTZeroIngestion,
+				ParseST:                           tc.enableSTZeroIngestion,
 				skipJitterOffsetting:              true,
 			}, app, nil)
 			defer scrapeManager.Stop()
@ -1066,6 +1068,7 @@ func TestNHCBAndSTZeroIngestion(t *testing.T) {
 	app := teststorage.NewAppendable()
 	discoveryManager, scrapeManager := runManagers(t, ctx, &Options{
 		EnableStartTimestampZeroIngestion: true,
+		ParseST:                           true,
 		skipJitterOffsetting:              true,
 	}, app, nil)
 	defer scrapeManager.Stop()
--- a/scrape/scrape.go
+++ b/scrape/scrape.go
@ -870,6 +870,7 @@ type scrapeLoop struct {

 	// Options from scrape.Options.
 	enableSTZeroIngestion   bool
+	parseST                 bool // Used by AppenderV2 only.
 	enableTypeAndUnitLabels bool
 	reportExtraMetrics      bool
 	appendMetadataToWAL     bool
@ -1224,7 +1225,12 @@ func newScrapeLoop(opts scrapeLoopOptions) *scrapeLoop {
 		validationScheme:              opts.sp.config.MetricNameValidationScheme,

 		// scrape.Options.
-		enableSTZeroIngestion:   opts.sp.options.EnableStartTimestampZeroIngestion,
+		enableSTZeroIngestion: opts.sp.options.EnableStartTimestampZeroIngestion,
+		// parseST was added recently. Before EnableStartTimestampZeroIngestion
+		// was enabling parsing ST. For non-Prometheus users of the scrape
+		// manager, we ensure appenderV2 parseST is set on EnableStartTimestampZeroIngestion
+		// This will be removed when EnableStartTimestampZeroIngestion is removed.
+		parseST:                 opts.sp.options.ParseST || opts.sp.options.EnableStartTimestampZeroIngestion,
 		enableTypeAndUnitLabels: opts.sp.options.EnableTypeAndUnitLabels,
 		appendMetadataToWAL:     opts.sp.options.AppendMetadata,
 		passMetadataInContext:   opts.sp.options.PassMetadataInContext,
@ -1253,9 +1259,8 @@ func (sl *scrapeLoop) getScrapeOffset() time.Duration {

 func (sl *scrapeLoop) run(errc chan<- error) {
 	var (
-		last              time.Time
-		alignedScrapeTime = time.Now().Round(0)
-		ticker            = time.NewTicker(sl.interval)
+		last   time.Time
+		ticker = time.NewTicker(sl.interval)
 	)
 	defer func() {
 		if sl.scrapeOnShutdown {
@ -1282,6 +1287,10 @@ func (sl *scrapeLoop) run(errc chan<- error) {
 		}
 	}

+	// Reset the ticker so target scrape times are aligned to the offset+intervals.
+	ticker.Reset(sl.interval)
+	alignedScrapeTime := time.Now().Round(0)
+
 	for {
 		select {
 		case <-sl.ctx.Done():
--- a/scrape/scrape_append_v2.go
+++ b/scrape/scrape_append_v2.go
@ -102,7 +102,7 @@ func (sl *scrapeLoopAppenderV2) append(b []byte, contentType string, ts time.Tim
 		IgnoreNativeHistograms:                  !sl.enableNativeHistogramScraping,
 		ConvertClassicHistogramsToNHCB:          sl.convertClassicHistToNHCB,
 		KeepClassicOnClassicAndNativeHistograms: sl.alwaysScrapeClassicHist,
-		OpenMetricsSkipSTSeries:                 sl.enableSTZeroIngestion,
+		OpenMetricsSkipSTSeries:                 sl.parseST,
 		FallbackContentType:                     sl.fallbackScrapeProtocol,
 	})
 	if p == nil {
@ -254,7 +254,7 @@ loop:
 			}

 			st := int64(0)
-			if sl.enableSTZeroIngestion {
+			if sl.parseST {
 				// p.StartTimestamp() tend to be expensive (e.g. OM1). Do it only if we care.
 				st = p.StartTimestamp()
 			}
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@ -24,6 +24,7 @@ import (
 	"log/slog"
 	"maps"
 	"math"
+	"net"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
@ -51,6 +52,7 @@ import (
 	sdktrace "go.opentelemetry.io/otel/sdk/trace"
 	"go.uber.org/atomic"
 	"go.uber.org/goleak"
+	"go.yaml.in/yaml/v2"

 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/discovery"
@ -69,6 +71,7 @@ import (
 	"github.com/prometheus/prometheus/util/pool"
 	"github.com/prometheus/prometheus/util/teststorage"
 	"github.com/prometheus/prometheus/util/testutil"
+	"github.com/prometheus/prometheus/util/testutil/synctest"
 )

 func TestMain(m *testing.M) {
@ -1546,6 +1549,14 @@ func TestPromTextToProto(t *testing.T) {
 	require.Equal(t, "promhttp_metric_handler_requests_total", got[236])
 }

+func seriesPerHistogramFor100HistsWithExemplars(appV2 bool) int {
+	if appV2 {
+		// AppenderV2 with parseST enabled, uses _created lines for ST instead of samples.
+		return 23
+	}
+	return 24
+}
+
 // TestScrapeLoopAppend_WithStorage tests appends and storage integration for the
 // large input files that are also used in benchmarks.
 func TestScrapeLoopAppend_WithStorage(t *testing.T) {
@ -1631,8 +1642,13 @@ func TestScrapeLoopAppend_WithStorage(t *testing.T) {
 				name:         "100HistsWithExemplars",
 				parsableText: makeTestHistogramsWithExemplars(100),

-				expectedSamplesLen: 24 * 100,
+				expectedSamplesLen: seriesPerHistogramFor100HistsWithExemplars(appV2) * 100,
 				testAppendedSamples: func(t *testing.T, committed []sample) {
+					st := int64(0)
+					if appV2 {
+						st = 1726839813016
+					}
+
 					// Verify a few samples.
 					m := metadata.Metadata{Type: model.MetricTypeHistogram, Help: "RPC latency distributions."}
 					testutil.RequireEqual(t, sample{
@ -1642,7 +1658,7 @@ func TestScrapeLoopAppend_WithStorage(t *testing.T) {
 							}
 							return "rpc_durations_histogram0_seconds"
 						}(),
-						M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"), V: 15, T: timestamp.FromTime(ts),
+						M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"), V: 15, ST: st, T: timestamp.FromTime(ts),
 						ES: []exemplar.Exemplar{
 							{Labels: labels.FromStrings("dummyID", "9818"), Value: 0.0002791130914009552, Ts: 1726839814982, HasTs: true},
 						},
@ -1654,17 +1670,24 @@ func TestScrapeLoopAppend_WithStorage(t *testing.T) {
 							}
 							return "rpc_durations_histogram49_seconds"
 						}(),
-						M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram49_seconds_sum"), V: -8.452185437166741e-05, T: timestamp.FromTime(ts),
-					}, committed[24*50-3])
+						M: m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram49_seconds_sum"), V: -8.452185437166741e-05, ST: st, T: timestamp.FromTime(ts),
+					}, committed[seriesPerHistogramFor100HistsWithExemplars(appV2)*49+21])

-					// This series does not have metadata, nor metric family, because of isSeriesPartOfFamily bug and OpenMetric 1.0 limitations around _created series.
-					// TODO(bwplotka): Fix with https://github.com/prometheus/prometheus/issues/17900
-					testutil.RequireEqual(t, sample{
-						L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_created"), V: 1.726839813016302e+09, T: timestamp.FromTime(ts),
-					}, committed[len(committed)-1])
+					if !appV2 {
+						// This series does not have metadata, nor metric family, because of isSeriesPartOfFamily bug and OpenMetric 1.0 limitations around _created series.
+						// TODO(bwplotka): Fix with https://github.com/prometheus/prometheus/issues/17900
+						testutil.RequireEqual(t, sample{
+							L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_created"), V: 1.726839813016302e+09, T: timestamp.FromTime(ts),
+						}, committed[len(committed)-1])
+					} else {
+						testutil.RequireEqual(t, sample{
+							MF: "rpc_durations_histogram99_seconds",
+							M:  m, L: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram99_seconds_count"), V: 15, ST: st, T: timestamp.FromTime(ts),
+						}, committed[len(committed)-1])
+					}
 				},
 				testExemplars: func(t *testing.T, er []exemplar.QueryResult) {
-					// 12 out of 24 histogram series have exemplars.
+					// 12 out of 23/24 histogram series have exemplars.
 					require.Len(t, er, 12*100)
 					testutil.RequireEqual(t, exemplar.QueryResult{
 						SeriesLabels: labels.FromStrings(model.MetricNameLabel, "rpc_durations_histogram0_seconds_bucket", "le", "0.0003100000000000002"),
@ -2901,6 +2924,11 @@ func TestScrapeLoopAppend(t *testing.T) {
 }

 func testScrapeLoopAppend(t *testing.T, appV2 bool) {
+	st := int64(0)
+	if appV2 {
+		st = 111111001
+	}
+
 	for _, test := range []struct {
 		title                           string
 		alwaysScrapeClassicHist         bool
@ -2953,6 +2981,32 @@ func testScrapeLoopAppend(t *testing.T, appV2 bool) {
 				ES: []exemplar.Exemplar{{Labels: labels.FromStrings("a", "abc"), Value: 1, Ts: 10000000, HasTs: true}},
 			}},
 		},
+		{
+			title: "Metric with ST",
+			scrapeText: `# TYPE metric counter
+metric_total{n="1"} 1.1
+metric_created{n="1"} 9999.999
+# EOF`,
+			contentType: "application/openmetrics-text",
+			samples: func() []sample {
+				if !appV2 {
+					return []sample{
+						{
+							L: labels.FromStrings("__name__", "metric_total", "n", "1"),
+							V: 1.1,
+						},
+						{
+							L: labels.FromStrings("__name__", "metric_created", "n", "1"),
+							V: 9999.999,
+						},
+					}
+				}
+				return []sample{{
+					L:  labels.FromStrings("__name__", "metric_total", "n", "1"),
+					ST: 9999999, V: 1.1,
+				}}
+			}(),
+		},
 		{
 			title: "Two metrics and exemplars",
 			scrapeText: `metric_total{n="1"} 1 # {t="1"} 1.0 10000
@ -2970,7 +3024,7 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000
 			}},
 		},
 		{
-			title: "Native histogram with three exemplars from classic buckets",
+			title: "Native histogram with ST and three exemplars from classic buckets",

 			enableNativeHistogramsIngestion: true,
 			scrapeText: `name: "test_histogram"
@ -2978,6 +3032,10 @@ help: "Test histogram with many buckets removed to keep it manageable in size."
 type: HISTOGRAM
 metric: <
  histogram: <
+    created_timestamp: <
+      seconds: 111111
+      nanos: 1000000
+    >
    sample_count: 175
    sample_sum: 0.0008280461746287094
    bucket: <
@ -3060,8 +3118,9 @@ metric: <
 `,
 			contentType: "application/vnd.google.protobuf",
 			samples: []sample{{
-				T: 1234568,
-				L: labels.FromStrings("__name__", "test_histogram"),
+				T:  1234568,
+				ST: st,
+				L:  labels.FromStrings("__name__", "test_histogram"),
 				H: &histogram.Histogram{
 					Count:         175,
 					ZeroCount:     2,
@ -3087,7 +3146,7 @@ metric: <
 			}},
 		},
 		{
-			title: "Native histogram with three exemplars scraped as classic histogram",
+			title: "Native histogram with ST and three exemplars scraped as classic histogram",

 			enableNativeHistogramsIngestion: true,
 			scrapeText: `name: "test_histogram"
@ -3095,6 +3154,10 @@ help: "Test histogram with many buckets removed to keep it manageable in size."
 type: HISTOGRAM
 metric: <
  histogram: <
+    created_timestamp: <
+      seconds: 111111
+      nanos: 1000000
+    >
    sample_count: 175
    sample_sum: 0.0008280461746287094
    bucket: <
@ -3179,8 +3242,9 @@ metric: <
 			contentType:             "application/vnd.google.protobuf",
 			samples: []sample{
 				{
-					T: 1234568,
-					L: labels.FromStrings("__name__", "test_histogram"),
+					T:  1234568,
+					ST: st,
+					L:  labels.FromStrings("__name__", "test_histogram"),
 					H: &histogram.Histogram{
 						Count:         175,
 						ZeroCount:     2,
@ -3205,26 +3269,26 @@ metric: <
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
 					},
 				},
-				{L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175},
-				{L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094},
-				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), T: 1234568, V: 2},
+				{L: labels.FromStrings("__name__", "test_histogram_count"), ST: st, T: 1234568, V: 175},
+				{L: labels.FromStrings("__name__", "test_histogram_sum"), ST: st, T: 1234568, V: 0.0008280461746287094},
+				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), ST: st, T: 1234568, V: 2},
 				{
-					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), T: 1234568, V: 4,
+					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), ST: st, T: 1234568, V: 4,
 					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}},
 				},
 				{
-					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), T: 1234568, V: 16,
+					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), ST: st, T: 1234568, V: 16,
 					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false}},
 				},
 				{
-					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), T: 1234568, V: 32,
+					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), ST: st, T: 1234568, V: 32,
 					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}},
 				},
-				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
+				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), ST: st, T: 1234568, V: 175},
 			},
 		},
 		{
-			title:                           "Native histogram with exemplars and no classic buckets",
+			title:                           "Native histogram with ST, exemplars and no classic buckets",
 			contentType:                     "application/vnd.google.protobuf",
 			enableNativeHistogramsIngestion: true,
 			scrapeText: `name: "test_histogram"
@ -3232,6 +3296,10 @@ help: "Test histogram."
 type: HISTOGRAM
 metric: <
  histogram: <
+    created_timestamp: <
+      seconds: 111111
+      nanos: 1000000
+    >
    sample_count: 175
    sample_sum: 0.0008280461746287094
    schema: 3
@ -3297,8 +3365,9 @@ metric: <

 `,
 			samples: []sample{{
-				T: 1234568,
-				L: labels.FromStrings("__name__", "test_histogram"),
+				T:  1234568,
+				ST: st,
+				L:  labels.FromStrings("__name__", "test_histogram"),
 				H: &histogram.Histogram{
 					Count:         175,
 					ZeroCount:     2,
@ -3324,7 +3393,7 @@ metric: <
 			}},
 		},
 		{
-			title:                           "Native histogram with exemplars but ingestion disabled",
+			title:                           "Native histogram with ST, exemplars but ingestion disabled",
 			contentType:                     "application/vnd.google.protobuf",
 			enableNativeHistogramsIngestion: false,
 			scrapeText: `name: "test_histogram"
@ -3332,6 +3401,10 @@ help: "Test histogram."
 type: HISTOGRAM
 metric: <
  histogram: <
+    created_timestamp: <
+      seconds: 111111
+      nanos: 1000000
+    >
    sample_count: 175
    sample_sum: 0.0008280461746287094
    schema: 3
@ -3397,9 +3470,9 @@ metric: <

 `,
 			samples: []sample{
-				{L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175},
-				{L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094},
-				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
+				{L: labels.FromStrings("__name__", "test_histogram_count"), ST: st, T: 1234568, V: 175},
+				{L: labels.FromStrings("__name__", "test_histogram_sum"), ST: st, T: 1234568, V: 0.0008280461746287094},
+				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), ST: st, T: 1234568, V: 175},
 			},
 		},
 	} {
@ -3421,7 +3494,7 @@ metric: <
 				// This test does not care about metadata.
 				// Having this true would mean we need to add metadata to sample
 				// expectations.
-				// TODO(bwplotka): Add cases for append metadata to WAL and pass metadata
+				// TODO(bwplotka): Add cases for append metadata to WAL and pass metadata.
 				sl.appendMetadataToWAL = false
 			})
 			app := sl.appender()
@ -6786,3 +6859,100 @@ func TestScrapePoolSetScrapeFailureLoggerRace(t *testing.T) {

 	wg.Wait()
 }
+
+func TestScrapeOffsetDistribution(t *testing.T) {
+	interval := 5 * time.Second
+
+	synctest.Test(t, func(t *testing.T) {
+		startTime := time.Now()
+
+		listener := newPipeListener()
+
+		var mu sync.Mutex
+		scrapeTimes := make(map[string][]time.Duration)
+
+		handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			select {
+			case <-r.Context().Done():
+				return
+			default:
+				mu.Lock()
+				target := r.URL.Path
+				scrapeTimes[target] = append(scrapeTimes[target], time.Since(startTime))
+				mu.Unlock()
+
+				w.Header().Set("Content-Type", "text/plain; version=0.0.4")
+				fmt.Fprintln(w, "expected_metric 1")
+			}
+		})
+
+		srv := httptest.NewUnstartedServer(handler)
+		srv.Listener = listener
+		srv.Start()
+		t.Cleanup(srv.Close)
+
+		app := teststorage.NewAppendable()
+		opts := &Options{
+			HTTPClientOptions: []config_util.HTTPClientOption{
+				config_util.WithDialContextFunc(func(ctx context.Context, _, _ string) (net.Conn, error) {
+					srvConn, cliConn := net.Pipe()
+					select {
+					case listener.conns <- srvConn:
+						return cliConn, nil
+					case <-listener.closed:
+						return nil, net.ErrClosed
+					case <-ctx.Done():
+						return nil, ctx.Err()
+					}
+				}),
+			},
+		}
+		scrapeManager, err := NewManager(opts, promslog.NewNopLogger(), nil, app, nil, prometheus.NewRegistry())
+		scrapeManager.offsetSeed = 1 // Set a fixed offset seed for deterministic testing.
+		require.NoError(t, err)
+
+		var targets []model.LabelSet
+		for i := range 5 {
+			targets = append(targets, model.LabelSet{
+				model.SchemeLabel:      "http",
+				model.AddressLabel:     model.LabelValue(fmt.Sprintf("target-%d.local", i)),
+				model.MetricsPathLabel: model.LabelValue(fmt.Sprintf("/metrics/%d", i)),
+			})
+		}
+
+		scrapeManager.updateTsets(map[string][]*targetgroup.Group{
+			"test": {{Targets: targets}},
+		})
+
+		cfg := &config.Config{
+			GlobalConfig: config.GlobalConfig{
+				ScrapeInterval:  model.Duration(interval),
+				ScrapeTimeout:   model.Duration(interval),
+				ScrapeProtocols: []config.ScrapeProtocol{config.PrometheusProto},
+			},
+			ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test"}},
+		}
+		cfgText, err := yaml.Marshal(*cfg)
+		require.NoError(t, err)
+		cfg = loadConfiguration(t, string(cfgText))
+		require.NoError(t, scrapeManager.ApplyConfig(cfg))
+
+		scrapeManager.reload()
+
+		numScrapes := 4
+		time.Sleep((time.Duration(numScrapes) * interval) + time.Second)
+		synctest.Wait()
+
+		scrapeManager.Stop()
+
+		for i := range numScrapes {
+			uniqueTimes := make(map[time.Duration]struct{})
+			for _, times := range scrapeTimes {
+				if i < len(times) {
+					uniqueTimes[times[i]] = struct{}{}
+				}
+			}
+			require.Greater(t, len(uniqueTimes), 2, "Expected targets to be scraped at staggered offsets rather than simultaneously at scrape index %d", i)
+		}
+	})
+}
--- a/storage/remote/queue_manager.go
+++ b/storage/remote/queue_manager.go
@ -761,11 +761,12 @@ outer:
 			default:
 			}
 			if t.shards.enqueue(s.Ref, timeSeries{
-				seriesLabels: lbls,
-				metadata:     meta,
-				timestamp:    s.T,
-				value:        s.V,
-				sType:        tSample,
+				seriesLabels:   lbls,
+				metadata:       meta,
+				startTimestamp: s.ST,
+				timestamp:      s.T,
+				value:          s.V,
+				sType:          tSample,
 			}) {
 				continue outer
 			}
@ -883,9 +884,10 @@ outer:
 			if t.shards.enqueue(h.Ref, timeSeries{
 				seriesLabels: lbls,
 				metadata:     meta,
-				timestamp:    h.T,
-				histogram:    h.H,
-				sType:        tHistogram,
+				// TODO(bwplotka): Populate ST once histogram Ref has it.
+				timestamp: h.T,
+				histogram: h.H,
+				sType:     tHistogram,
 			}) {
 				continue outer
 			}
@ -942,8 +944,9 @@ outer:
 			default:
 			}
 			if t.shards.enqueue(h.Ref, timeSeries{
-				seriesLabels:   lbls,
-				metadata:       meta,
+				seriesLabels: lbls,
+				metadata:     meta,
+				// TODO(bwplotka): Populate ST once histogram Ref has it.
 				timestamp:      h.T,
 				floatHistogram: h.FH,
 				sType:          tFloatHistogram,
@ -1397,13 +1400,13 @@ type queue struct {
 }

 type timeSeries struct {
-	seriesLabels   labels.Labels
-	value          float64
-	histogram      *histogram.Histogram
-	floatHistogram *histogram.FloatHistogram
-	metadata       *metadata.Metadata
-	timestamp      int64
-	exemplarLabels labels.Labels
+	seriesLabels              labels.Labels
+	value                     float64
+	histogram                 *histogram.Histogram
+	floatHistogram            *histogram.FloatHistogram
+	metadata                  *metadata.Metadata
+	startTimestamp, timestamp int64
+	exemplarLabels            labels.Labels
 	// The type of series: sample, exemplar, or histogram.
 	sType seriesType
 }
@ -1994,8 +1997,9 @@ func populateV2TimeSeries(symbolTable *writev2.SymbolsTable, batch []timeSeries,
 		switch d.sType {
 		case tSample:
 			pendingData[nPending].Samples = append(pendingData[nPending].Samples, writev2.Sample{
-				Value:     d.value,
-				Timestamp: d.timestamp,
+				Value:          d.value,
+				Timestamp:      d.timestamp,
+				StartTimestamp: d.startTimestamp,
 			})
 			nPendingSamples++
 		case tExemplar:
@ -2006,9 +2010,11 @@ func populateV2TimeSeries(symbolTable *writev2.SymbolsTable, batch []timeSeries,
 			})
 			nPendingExemplars++
 		case tHistogram:
+			// TODO(bwplotka): Extend with ST once histograms populate it.
 			pendingData[nPending].Histograms = append(pendingData[nPending].Histograms, writev2.FromIntHistogram(d.timestamp, d.histogram))
 			nPendingHistograms++
 		case tFloatHistogram:
+			// TODO(bwplotka): Extend with ST once histograms populate it.
 			pendingData[nPending].Histograms = append(pendingData[nPending].Histograms, writev2.FromFloatHistogram(d.timestamp, d.floatHistogram))
 			nPendingHistograms++
 		case tMetadata:
--- a/storage/remote/queue_manager_test.go
+++ b/storage/remote/queue_manager_test.go
@ -143,7 +143,10 @@ func TestBasicContentNegotiation(t *testing.T) {
 			s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false)
 			defer s.Close()

-			recs := testwal.GenerateRecords(recCase{Series: 1, SamplesPerSeries: 1})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   tc.senderProtoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: 1, SamplesPerSeries: 1,
+			})

 			conf.RemoteWriteConfigs[0].ProtobufMessage = tc.senderProtoMsg
 			require.NoError(t, s.ApplyConfig(conf))
@ -225,6 +228,7 @@ func TestSampleDelivery(t *testing.T) {
 				s := NewStorage(nil, nil, nil, dir, defaultFlushDeadline, nil, false)
 				defer s.Close()

+				rc.NoST = protoMsg == remoteapi.WriteV1MessageType // RW1 does not support ST.
 				recs := testwal.GenerateRecords(rc)

 				var (
@ -388,7 +392,10 @@ func TestSampleDeliveryTimeout(t *testing.T) {
 	t.Parallel()
 	for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} {
 		t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
-			recs := testwal.GenerateRecords(recCase{Series: 10, SamplesPerSeries: 10})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: 10, SamplesPerSeries: 10,
+			})
 			cfg := testDefaultQueueConfig()
 			mcfg := config.DefaultMetadataConfig
 			cfg.MaxShards = 1
@ -417,7 +424,10 @@ func TestSampleDeliveryOrder(t *testing.T) {
 		t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
 			ts := 10
 			n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
-			recs := testwal.GenerateRecords(recCase{Series: n, SamplesPerSeries: 1})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: n, SamplesPerSeries: 1,
+			})

 			c, m := newTestClientAndQueueManager(t, defaultFlushDeadline, protoMsg)
 			c.expectSamples(recs.Samples, recs.Series)
@ -446,7 +456,10 @@ func TestShutdown(t *testing.T) {
 				m := newTestQueueManager(t, cfg, mcfg, deadline, c, protoMsg)
 				// Send 2x batch size, so we know it will need at least two sends.
 				n := 2 * config.DefaultQueueConfig.MaxSamplesPerSend
-				recs := testwal.GenerateRecords(recCase{Series: n / 1000, SamplesPerSeries: 1000})
+				recs := testwal.GenerateRecords(recCase{
+					NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+					Series: n / 1000, SamplesPerSeries: 1000,
+				})
 				m.StoreSeries(recs.Series, 0)
 				m.Start()

@ -515,7 +528,10 @@ func TestReshard(t *testing.T) {
 			size := 10 // Make bigger to find more races.
 			nSeries := 6
 			samplesPerSeries := config.DefaultQueueConfig.Capacity * size
-			recs := testwal.GenerateRecords(recCase{Series: nSeries, SamplesPerSeries: samplesPerSeries})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: nSeries, SamplesPerSeries: samplesPerSeries,
+			})
 			t.Logf("about to send %v samples", len(recs.Samples))

 			cfg := config.DefaultQueueConfig
@ -591,7 +607,10 @@ func TestReshardPartialBatch(t *testing.T) {
 	t.Parallel()
 	for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} {
 		t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
-			recs := testwal.GenerateRecords(recCase{Series: 1, SamplesPerSeries: 10})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: 1, SamplesPerSeries: 10,
+			})

 			c := NewTestBlockedWriteClient()

@ -636,7 +655,10 @@ func TestReshardPartialBatch(t *testing.T) {
 func TestQueueFilledDeadlock(t *testing.T) {
 	for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} {
 		t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
-			recs := testwal.GenerateRecords(recCase{Series: 50, SamplesPerSeries: 1})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: 50, SamplesPerSeries: 1,
+			})

 			c := NewNopWriteClient()

@ -835,8 +857,8 @@ func getSeriesIDFromRef(r record.RefSeries) string {
 // TestWriteClient represents write client which does not call remote storage,
 // but instead re-implements fake WriteHandler for test purposes.
 type TestWriteClient struct {
-	receivedSamples         map[string][]prompb.Sample
-	expectedSamples         map[string][]prompb.Sample
+	receivedSamples         map[string][]writev2.Sample
+	expectedSamples         map[string][]writev2.Sample
 	receivedExemplars       map[string][]prompb.Exemplar
 	expectedExemplars       map[string][]prompb.Exemplar
 	receivedHistograms      map[string][]prompb.Histogram
@ -860,8 +882,8 @@ type TestWriteClient struct {
 // NewTestWriteClient creates a new testing write client.
 func NewTestWriteClient(protoMsg remoteapi.WriteMessageType) *TestWriteClient {
 	return &TestWriteClient{
-		receivedSamples:  map[string][]prompb.Sample{},
-		expectedSamples:  map[string][]prompb.Sample{},
+		receivedSamples:  map[string][]writev2.Sample{},
+		expectedSamples:  map[string][]writev2.Sample{},
 		receivedMetadata: map[string][]prompb.MetricMetadata{},
 		expectedMetadata: map[string][]prompb.MetricMetadata{},
 		protoMsg:         protoMsg,
@ -876,18 +898,20 @@ func (c *TestWriteClient) injectErrors(injectedErrs []error) {
 	c.retry = false
 }

+// expectSamples injects samples that will be expected on waitForExpectedData.
 func (c *TestWriteClient) expectSamples(ss []record.RefSample, series []record.RefSeries) {
 	c.mtx.Lock()
 	defer c.mtx.Unlock()

-	c.expectedSamples = map[string][]prompb.Sample{}
-	c.receivedSamples = map[string][]prompb.Sample{}
+	c.expectedSamples = map[string][]writev2.Sample{}
+	c.receivedSamples = map[string][]writev2.Sample{}

 	for _, s := range ss {
 		tsID := getSeriesIDFromRef(series[s.Ref])
-		c.expectedSamples[tsID] = append(c.expectedSamples[tsID], prompb.Sample{
-			Timestamp: s.T,
-			Value:     s.V,
+		c.expectedSamples[tsID] = append(c.expectedSamples[tsID], writev2.Sample{
+			StartTimestamp: s.ST,
+			Timestamp:      s.T,
+			Value:          s.V,
 		})
 	}
 }
@ -1065,7 +1089,10 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) (WriteResp
 		}
 	}

-	var reqProto *prompb.WriteRequest
+	var (
+		reqProto   *prompb.WriteRequest
+		reqProtoV2 *writev2.Request
+	)
 	switch c.protoMsg {
 	case remoteapi.WriteV1MessageType:
 		reqProto = &prompb.WriteRequest{}
@ -1073,10 +1100,10 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) (WriteResp
 	case remoteapi.WriteV2MessageType:
 		// NOTE(bwplotka): v1 msg can be unmarshaled to v2 sometimes, without
 		// errors.
-		var reqProtoV2 writev2.Request
-		err = proto.Unmarshal(reqBuf, &reqProtoV2)
+		reqProtoV2 = &writev2.Request{}
+		err = proto.Unmarshal(reqBuf, reqProtoV2)
 		if err == nil {
-			reqProto, err = v2RequestToWriteRequest(&reqProtoV2)
+			reqProto, err = v2RequestToWriteRequest(reqProtoV2)
 		}
 	}
 	if err != nil {
@ -1085,11 +1112,21 @@ func (c *TestWriteClient) Store(_ context.Context, req []byte, _ int) (WriteResp

 	rs := WriteResponseStats{}
 	b := labels.NewScratchBuilder(0)
-	for _, ts := range reqProto.Timeseries {
+	for i, ts := range reqProto.Timeseries {
 		labels := ts.ToLabels(&b, nil)
 		tsID := labels.String()
-		if len(ts.Samples) > 0 {
-			c.receivedSamples[tsID] = append(c.receivedSamples[tsID], ts.Samples...)
+		for j, s := range ts.Samples {
+			st := int64(0)
+			if reqProtoV2 != nil {
+				// TODO(bwplotka): Refactor queue manager TestWriteClient for tighter validation
+				// and native support for new RW2 features. For now we inject STs in RW2 case to the existing test suite.
+				st = reqProtoV2.Timeseries[i].Samples[j].StartTimestamp
+			}
+			c.receivedSamples[tsID] = append(c.receivedSamples[tsID], writev2.Sample{
+				StartTimestamp: st,
+				Timestamp:      s.Timestamp,
+				Value:          s.Value,
+			})
 		}
 		rs.Samples += len(ts.Samples)

@ -1265,6 +1302,13 @@ var extraLabels []labels.Label = []labels.Label{
 	{Name: "pod_name", Value: "some-other-name-5j8s8"},
 }

+// Recommended CLI invocation(s):
+/*
+	export bench=sampleSend && go test ./storage/remote/... \
+		-run '^$' -bench '^BenchmarkSampleSend' \
+		-benchtime 1s -count 6 -cpu 2 -timeout 999m -benchmem \
+		| tee ${bench}.txt
+*/
 func BenchmarkSampleSend(b *testing.B) {
 	// Send one sample per series, which is the typical remote_write case
 	const numSamples = 1
@ -1771,6 +1815,13 @@ func createDummyTimeSeries(instances int) []timeSeries {
 	return result
 }

+// Recommended CLI invocation(s):
+/*
+	export bench=buildWriteRequest && go test ./storage/remote/... \
+		-run '^$' -bench '^BenchmarkBuildWriteRequest' \
+		-benchtime 1s -count 6 -cpu 2 -timeout 999m -benchmem \
+		| tee ${bench}.txt
+*/
 func BenchmarkBuildWriteRequest(b *testing.B) {
 	noopLogger := promslog.NewNopLogger()
 	bench := func(b *testing.B, batch []timeSeries) {
@ -1811,6 +1862,13 @@ func BenchmarkBuildWriteRequest(b *testing.B) {
 	})
 }

+// Recommended CLI invocation(s):
+/*
+	export bench=buildV2WriteRequest && go test ./storage/remote/... \
+		-run '^$' -bench '^BenchmarkBuildV2WriteRequest' \
+		-benchtime 1s -count 6 -cpu 2 -timeout 999m -benchmem \
+		| tee ${bench}.txt
+*/
 func BenchmarkBuildV2WriteRequest(b *testing.B) {
 	noopLogger := promslog.NewNopLogger()
 	bench := func(b *testing.B, batch []timeSeries) {
@ -1860,7 +1918,9 @@ func TestDropOldTimeSeries(t *testing.T) {
 			size := 10
 			nSeries := 6
 			nSamples := config.DefaultQueueConfig.Capacity * size
+			noST := protoMsg == remoteapi.WriteV1MessageType // RW1
 			pastRecs := testwal.GenerateRecords(recCase{
+				NoST:             noST,
 				Series:           nSeries,
 				SamplesPerSeries: (nSamples / nSeries) / 2, // Half data is past.
 				TsFn: func(_, j int) int64 {
@ -1869,6 +1929,7 @@ func TestDropOldTimeSeries(t *testing.T) {
 				},
 			})
 			newRecs := testwal.GenerateRecords(recCase{
+				NoST:             noST,
 				Series:           nSeries,
 				SamplesPerSeries: (nSamples / nSeries) / 2, // Half data is past.
 				TsFn: func(_, j int) int64 {
@ -1943,6 +2004,7 @@ func TestSendSamplesWithBackoffWithSampleAgeLimit(t *testing.T) {
 				r := rand.New(rand.NewSource(99))

 				recs := testwal.GenerateRecords(recCase{
+					NoST:             protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
 					Series:           numberOfSeries,
 					SamplesPerSeries: 1,
 					TsFn: func(_, _ int) int64 {
@ -1967,9 +2029,10 @@ func TestSendSamplesWithBackoffWithSampleAgeLimit(t *testing.T) {
 				if !shouldBeDropped {
 					for _, s := range recs.Samples {
 						tsID := getSeriesIDFromRef(recs.Series[s.Ref])
-						c.expectedSamples[tsID] = append(c.expectedSamples[tsID], prompb.Sample{
-							Timestamp: s.T,
-							Value:     s.V,
+						c.expectedSamples[tsID] = append(c.expectedSamples[tsID], writev2.Sample{
+							StartTimestamp: s.ST,
+							Timestamp:      s.T,
+							Value:          s.V,
 						})
 					}
 				}
@ -2490,7 +2553,10 @@ func TestHighestTimestampOnAppend(t *testing.T) {
 		t.Run(fmt.Sprint(protoMsg), func(t *testing.T) {
 			nSamples := 11 * config.DefaultQueueConfig.Capacity
 			nSeries := 3
-			recs := testwal.GenerateRecords(recCase{Series: nSeries, SamplesPerSeries: nSamples / nSeries})
+			recs := testwal.GenerateRecords(recCase{
+				NoST:   protoMsg == remoteapi.WriteV1MessageType, // RW1 does not support ST.
+				Series: nSeries, SamplesPerSeries: nSamples / nSeries,
+			})

 			_, m := newTestClientAndQueueManager(t, defaultFlushDeadline, protoMsg)
 			m.Start()
--- a/storage/series.go
+++ b/storage/series.go
@ -341,11 +341,14 @@ func (s *seriesToChunkEncoder) Iterator(it chunks.Iterator) chunks.Iterator {
 	i := 0
 	seriesIter := s.Series.Iterator(nil)
 	lastType := chunkenc.ValNone
+	lastHadST := false
 	for typ := seriesIter.Next(); typ != chunkenc.ValNone; typ = seriesIter.Next() {
-		if typ != lastType || i >= seriesToChunkEncoderSplit {
+		st := seriesIter.AtST()
+		hasST := st != 0
+		if typ != lastType || lastHadST != hasST || i >= seriesToChunkEncoderSplit {
 			// Create a new chunk if the sample type changed or too many samples in the current one.
 			chks = appendChunk(chks, mint, maxt, chk)
-			chk, err = chunkenc.NewEmptyChunk(typ.ChunkEncoding())
+			chk, err = typ.NewChunk(hasST)
 			if err != nil {
 				return errChunksIterator{err: err}
 			}
@ -358,21 +361,20 @@ func (s *seriesToChunkEncoder) Iterator(it chunks.Iterator) chunks.Iterator {
 			i = 0
 		}
 		lastType = typ
+		lastHadST = hasST

 		var (
-			st, t int64
-			v     float64
-			h     *histogram.Histogram
-			fh    *histogram.FloatHistogram
+			t  int64
+			v  float64
+			h  *histogram.Histogram
+			fh *histogram.FloatHistogram
 		)
 		switch typ {
 		case chunkenc.ValFloat:
 			t, v = seriesIter.At()
-			st = seriesIter.AtST()
 			app.Append(st, t, v)
 		case chunkenc.ValHistogram:
 			t, h = seriesIter.AtHistogram(nil)
-			st = seriesIter.AtST()
 			newChk, recoded, app, err = app.AppendHistogram(nil, st, t, h, false)
 			if err != nil {
 				return errChunksIterator{err: err}
@ -388,7 +390,6 @@ func (s *seriesToChunkEncoder) Iterator(it chunks.Iterator) chunks.Iterator {
 			}
 		case chunkenc.ValFloatHistogram:
 			t, fh = seriesIter.AtFloatHistogram(nil)
-			st = seriesIter.AtST()
 			newChk, recoded, app, err = app.AppendFloatHistogram(nil, st, t, fh, false)
 			if err != nil {
 				return errChunksIterator{err: err}
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@ -95,7 +95,9 @@ type Options struct {

 	// EnableSTStorage determines whether agent DB should write a Start Timestamp (ST)
 	// per sample to WAL.
-	// TODO(bwplotka): Implement this option as per PROM-60, currently it's noop.
+	// Controlled by the `--enable-feature=st-storage` CLI flag; when enabled, ST is
+	// persisted to the WAL for samples that include a non-zero start timestamp in
+	// supported record types.
 	EnableSTStorage bool
 }

@ -490,7 +492,7 @@ func (db *DB) loadWAL(r *wlog.Reader, duplicateRefToValidRef map[chunks.HeadSeri
 					return
 				}
 				decoded <- series
-			case record.Samples:
+			case record.Samples, record.SamplesV2:
 				samples := db.walReplaySamplesPool.Get()[:0]
 				samples, err = dec.Samples(rec, samples)
 				if err != nil {
@ -750,7 +752,7 @@ func (db *DB) truncate(mint int64) error {

 	db.metrics.checkpointCreationTotal.Inc()

-	if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, db.keepSeriesInWALCheckpointFn(last), mint); err != nil {
+	if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, db.keepSeriesInWALCheckpointFn(last), mint, db.opts.EnableSTStorage); err != nil {
 		db.metrics.checkpointCreationFail.Inc()
 		var cerr *wlog.CorruptionErr
 		if errors.As(err, &cerr) {
@ -1196,7 +1198,7 @@ func (a *appenderBase) log() error {
 	a.mtx.RLock()
 	defer a.mtx.RUnlock()

-	var encoder record.Encoder
+	encoder := record.Encoder{EnableSTStorage: a.opts.EnableSTStorage}
 	buf := a.bufPool.Get().([]byte)
 	defer func() {
 		a.bufPool.Put(buf) //nolint:staticcheck
@ -1320,7 +1322,7 @@ func (a *appenderBase) logSeries() error {
 			a.bufPool.Put(buf) //nolint:staticcheck
 		}()

-		var encoder record.Encoder
+		encoder := record.Encoder{EnableSTStorage: a.opts.EnableSTStorage}
 		buf = encoder.Series(a.pendingSeries, buf)
 		if err := a.wal.Log(buf); err != nil {
 			return err
--- a/tsdb/agent/db_append_v2.go
+++ b/tsdb/agent/db_append_v2.go
@ -72,7 +72,6 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
 	lastTS := s.lastTs
 	s.Unlock()

-	// TODO(bwplotka): Handle ST natively (as per PROM-60).
 	if a.opts.EnableSTAsZeroSample && st != 0 {
 		a.bestEffortAppendSTZeroSample(s, ls, lastTS, st, t, h, fh)
 	}
@ -86,6 +85,7 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
 	case fh != nil:
 		isStale = value.IsStaleNaN(fh.Sum)
 		// NOTE: always modify pendingFloatHistograms and floatHistogramSeries together
+		// TODO(krajorama,ywwg,bwplotka): Pass ST when available in WAL.
 		a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{
 			Ref: s.ref,
 			T:   t,
@ -95,6 +95,7 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
 	case h != nil:
 		isStale = value.IsStaleNaN(h.Sum)
 		// NOTE: always modify pendingHistograms and histogramSeries together
+		// TODO(krajorama,ywwg,bwplotka): Pass ST when available in WAL.
 		a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{
 			Ref: s.ref,
 			T:   t,
@ -107,6 +108,7 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
 		// NOTE: always modify pendingSamples and sampleSeries together.
 		a.pendingSamples = append(a.pendingSamples, record.RefSample{
 			Ref: s.ref,
+			ST:  st,
 			T:   t,
 			V:   v,
 		})
--- a/tsdb/agent/db_append_v2_test.go
+++ b/tsdb/agent/db_append_v2_test.go
@ -18,6 +18,7 @@ import (
 	"fmt"
 	"math"
 	"path/filepath"
+	"strconv"
 	"testing"
 	"time"

@ -89,278 +90,301 @@ func TestDB_InvalidSeries_AppendV2(t *testing.T) {
 	})
 }

+// TestCommit_AppendV2 tests Appender commit.
+// TODO(bwplotka): Rewrite this so Refs are generated, then appended, then expected so we test the
+// exact data durability.
 func TestCommit_AppendV2(t *testing.T) {
 	const (
 		numDatapoints = 1000
 		numHistograms = 100
 		numSeries     = 8
 	)
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			opts := DefaultOptions()
+			opts.EnableSTStorage = enableSTStorage
+			s := createTestAgentDB(t, nil, opts)

-	s := createTestAgentDB(t, nil, DefaultOptions())
-	app := s.AppenderV2(context.TODO())
+			var (
+				expectedSampleSTs []int64
+				gotSampleSTs      []int64
+			)
+			if enableSTStorage {
+				expectedSampleSTs = make([]int64, 0, numSeries*numDatapoints)
+				gotSampleSTs = make([]int64, 0, numSeries*numDatapoints)
+			}

-	lbls := labelsForTest(t.Name(), numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
+			app := s.AppenderV2(t.Context())
+			lbls := labelsForTest(t.Name(), numSeries)
+			for _, l := range lbls {
+				lset := labels.New(l...)

-		for i := range numDatapoints {
-			sample := chunks.GenerateSamples(0, 1)
-			_, err := app.Append(0, lset, 0, sample[0].T(), sample[0].F(), nil, nil, storage.AOptions{
-				Exemplars: []exemplar.Exemplar{{
-					Labels: lset,
-					Ts:     sample[0].T() + int64(i),
-					Value:  sample[0].F(),
-					HasTs:  true,
-				}},
-			})
+				for i := range numDatapoints {
+					sample := chunks.GenerateSamples(0, 1)
+					st := int64(i + 1234)
+					_, err := app.Append(0, lset, st, sample[0].T()+2000, sample[0].F(), nil, nil, storage.AOptions{
+						Exemplars: []exemplar.Exemplar{{
+							Labels: lset,
+							Ts:     sample[0].T() + int64(i) + 2000,
+							Value:  sample[0].F(),
+							HasTs:  true,
+						}},
+					})
+					require.NoError(t, err)
+					if enableSTStorage {
+						expectedSampleSTs = append(expectedSampleSTs, st)
+					}
+				}
+			}
+
+			lbls = labelsForTest(t.Name()+"_histogram", numSeries)
+			for _, l := range lbls {
+				lset := labels.New(l...)
+
+				histograms := tsdbutil.GenerateTestHistograms(numHistograms)
+
+				for i := range numHistograms {
+					_, err := app.Append(0, lset, int64(i+2234), int64(i+2000), 0, histograms[i], nil, storage.AOptions{})
+					require.NoError(t, err)
+				}
+			}
+
+			lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
+			for _, l := range lbls {
+				lset := labels.New(l...)
+
+				customBucketHistograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
+
+				for i := range numHistograms {
+					_, err := app.Append(0, lset, int64(i+3234), int64(i+2000), 0, customBucketHistograms[i], nil, storage.AOptions{})
+					require.NoError(t, err)
+				}
+			}
+
+			lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
+			for _, l := range lbls {
+				lset := labels.New(l...)
+
+				floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
+
+				for i := range numHistograms {
+					_, err := app.Append(0, lset, int64(i+4234), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{})
+					require.NoError(t, err)
+				}
+			}
+
+			lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
+			for _, l := range lbls {
+				lset := labels.New(l...)
+
+				customBucketFloatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
+
+				for i := range numHistograms {
+					_, err := app.Append(0, lset, int64(i+5234), int64(i+2000), 0, nil, customBucketFloatHistograms[i], storage.AOptions{})
+					require.NoError(t, err)
+				}
+			}
+
+			require.NoError(t, app.Commit())
+			require.NoError(t, s.Close())
+
+			sr, err := wlog.NewSegmentsReader(s.wal.Dir())
 			require.NoError(t, err)
-		}
+			defer func() {
+				require.NoError(t, sr.Close())
+			}()
+
+			// Read records from WAL and check for expected count of series, samples, and exemplars.
+			var (
+				r   = wlog.NewReader(sr)
+				dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
+
+				walSeriesCount, walSamplesCount, walExemplarsCount, walHistogramCount, walFloatHistogramCount int
+			)
+			for r.Next() {
+				rec := r.Record()
+				switch dec.Type(rec) {
+				case record.Series:
+					var series []record.RefSeries
+					series, err = dec.Series(rec, series)
+					require.NoError(t, err)
+					walSeriesCount += len(series)
+
+				case record.Samples:
+					if enableSTStorage {
+						t.Errorf("Got V1 Samples when ST enabled")
+					}
+					var samples []record.RefSample
+					samples, err = dec.Samples(rec, samples)
+					require.NoError(t, err)
+					walSamplesCount += len(samples)
+				case record.SamplesV2:
+					if !enableSTStorage {
+						t.Errorf("Got V2 Samples when ST disabled")
+					}
+					var samples []record.RefSample
+					samples, err = dec.Samples(rec, samples)
+					require.NoError(t, err)
+
+					for _, s := range samples {
+						gotSampleSTs = append(gotSampleSTs, s.ST)
+					}
+					walSamplesCount += len(samples)
+
+				case record.HistogramSamples, record.CustomBucketsHistogramSamples:
+					var histograms []record.RefHistogramSample
+					histograms, err = dec.HistogramSamples(rec, histograms)
+					require.NoError(t, err)
+					walHistogramCount += len(histograms)
+
+				case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
+					var floatHistograms []record.RefFloatHistogramSample
+					floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
+					require.NoError(t, err)
+					walFloatHistogramCount += len(floatHistograms)
+
+				case record.Exemplars:
+					var exemplars []record.RefExemplar
+					exemplars, err = dec.Exemplars(rec, exemplars)
+					require.NoError(t, err)
+					walExemplarsCount += len(exemplars)
+
+				default:
+				}
+			}
+
+			// Check that the WAL contained the same number of committed series/samples/exemplars.
+			require.Equal(t, numSeries*5, walSeriesCount, "unexpected number of series")
+			require.Equal(t, numSeries*numDatapoints, walSamplesCount, "unexpected number of samples")
+			require.Equal(t, expectedSampleSTs, gotSampleSTs, "unexpected STs received")
+			require.Equal(t, numSeries*numDatapoints, walExemplarsCount, "unexpected number of exemplars")
+			require.Equal(t, numSeries*numHistograms*2, walHistogramCount, "unexpected number of histograms")
+			require.Equal(t, numSeries*numHistograms*2, walFloatHistogramCount, "unexpected number of float histograms")
+
+			// Check that we can still create both kinds of Appender.
+			// Regression test against https://github.com/prometheus/prometheus/issues/17800.
+			_ = s.Appender(t.Context())
+			_ = s.AppenderV2(t.Context())
+		})
 	}
-
-	lbls = labelsForTest(t.Name()+"_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
-
-		histograms := tsdbutil.GenerateTestHistograms(numHistograms)
-
-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{})
-			require.NoError(t, err)
-		}
-	}
-
-	lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
-
-		customBucketHistograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
-
-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, customBucketHistograms[i], nil, storage.AOptions{})
-			require.NoError(t, err)
-		}
-	}
-
-	lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
-
-		floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
-
-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{})
-			require.NoError(t, err)
-		}
-	}
-
-	lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
-
-		customBucketFloatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
-
-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, nil, customBucketFloatHistograms[i], storage.AOptions{})
-			require.NoError(t, err)
-		}
-	}
-
-	require.NoError(t, app.Commit())
-	require.NoError(t, s.Close())
-
-	sr, err := wlog.NewSegmentsReader(s.wal.Dir())
-	require.NoError(t, err)
-	defer func() {
-		require.NoError(t, sr.Close())
-	}()
-
-	// Read records from WAL and check for expected count of series, samples, and exemplars.
-	var (
-		r   = wlog.NewReader(sr)
-		dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
-
-		walSeriesCount, walSamplesCount, walExemplarsCount, walHistogramCount, walFloatHistogramCount int
-	)
-	for r.Next() {
-		rec := r.Record()
-		switch dec.Type(rec) {
-		case record.Series:
-			var series []record.RefSeries
-			series, err = dec.Series(rec, series)
-			require.NoError(t, err)
-			walSeriesCount += len(series)
-
-		case record.Samples:
-			var samples []record.RefSample
-			samples, err = dec.Samples(rec, samples)
-			require.NoError(t, err)
-			walSamplesCount += len(samples)
-
-		case record.HistogramSamples, record.CustomBucketsHistogramSamples:
-			var histograms []record.RefHistogramSample
-			histograms, err = dec.HistogramSamples(rec, histograms)
-			require.NoError(t, err)
-			walHistogramCount += len(histograms)
-
-		case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
-			var floatHistograms []record.RefFloatHistogramSample
-			floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
-			require.NoError(t, err)
-			walFloatHistogramCount += len(floatHistograms)
-
-		case record.Exemplars:
-			var exemplars []record.RefExemplar
-			exemplars, err = dec.Exemplars(rec, exemplars)
-			require.NoError(t, err)
-			walExemplarsCount += len(exemplars)
-
-		default:
-		}
-	}
-
-	// Check that the WAL contained the same number of committed series/samples/exemplars.
-	require.Equal(t, numSeries*5, walSeriesCount, "unexpected number of series")
-	require.Equal(t, numSeries*numDatapoints, walSamplesCount, "unexpected number of samples")
-	require.Equal(t, numSeries*numDatapoints, walExemplarsCount, "unexpected number of exemplars")
-	require.Equal(t, numSeries*numHistograms*2, walHistogramCount, "unexpected number of histograms")
-	require.Equal(t, numSeries*numHistograms*2, walFloatHistogramCount, "unexpected number of float histograms")
-
-	// Check that we can still create both kinds of Appender - see https://github.com/prometheus/prometheus/issues/17800.
-	_ = s.Appender(context.TODO())
-	_ = s.AppenderV2(context.TODO())
 }

-func TestRollback_AppendV2(t *testing.T) {
+func TestRollbackAppendV2(t *testing.T) {
 	const (
 		numDatapoints = 1000
 		numHistograms = 100
 		numSeries     = 8
 	)

-	s := createTestAgentDB(t, nil, DefaultOptions())
-	app := s.AppenderV2(context.TODO())
+	for _, enableSTStorage := range []bool{false, true} {
+		opts := DefaultOptions()
+		opts.EnableSTStorage = enableSTStorage
+		s := createTestAgentDB(t, nil, opts)
+		app := s.AppenderV2(context.TODO())

-	lbls := labelsForTest(t.Name(), numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
+		lbls := labelsForTest(t.Name(), numSeries)
+		for _, l := range lbls {
+			lset := labels.New(l...)

-		for range numDatapoints {
-			sample := chunks.GenerateSamples(0, 1)
-			_, err := app.Append(0, lset, 0, sample[0].T(), sample[0].F(), nil, nil, storage.AOptions{})
-			require.NoError(t, err)
+			for i := range numDatapoints {
+				sample := chunks.GenerateSamples(0, 1)
+				_, err := app.Append(0, lset, int64(i), sample[0].T()+2000, sample[0].F(), nil, nil, storage.AOptions{})
+				require.NoError(t, err)
+			}
 		}
-	}

-	lbls = labelsForTest(t.Name()+"_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
+		lbls = labelsForTest(t.Name()+"_histogram", numSeries)
+		for _, l := range lbls {
+			lset := labels.New(l...)

-		histograms := tsdbutil.GenerateTestHistograms(numHistograms)
+			histograms := tsdbutil.GenerateTestHistograms(numHistograms)

-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{})
-			require.NoError(t, err)
+			for i := range numHistograms {
+				_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{})
+				require.NoError(t, err)
+			}
 		}
-	}

-	lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
+		lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries)
+		for _, l := range lbls {
+			lset := labels.New(l...)

-		histograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)
+			histograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms)

-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{})
-			require.NoError(t, err)
+			for i := range numHistograms {
+				_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{})
+				require.NoError(t, err)
+			}
 		}
-	}

-	lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
+		lbls = labelsForTest(t.Name()+"_float_histogram", numSeries)
+		for _, l := range lbls {
+			lset := labels.New(l...)

-		floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)
+			floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms)

-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{})
-			require.NoError(t, err)
+			for i := range numHistograms {
+				_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{})
+				require.NoError(t, err)
+			}
 		}
-	}

-	lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
-	for _, l := range lbls {
-		lset := labels.New(l...)
+		lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries)
+		for _, l := range lbls {
+			lset := labels.New(l...)

-		floatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)
+			floatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms)

-		for i := range numHistograms {
-			_, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{})
-			require.NoError(t, err)
+			for i := range numHistograms {
+				_, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{})
+				require.NoError(t, err)
+			}
 		}
-	}

-	// Do a rollback, which should clear uncommitted data. A followup call to
-	// commit should persist nothing to the WAL.
-	require.NoError(t, app.Rollback())
-	require.NoError(t, app.Commit())
-	require.NoError(t, s.Close())
+		// Do a rollback, which should clear uncommitted data. A followup call to
+		// commit should persist nothing to the WAL.
+		require.NoError(t, app.Rollback())
+		require.NoError(t, app.Commit())
+		require.NoError(t, s.Close())

-	sr, err := wlog.NewSegmentsReader(s.wal.Dir())
-	require.NoError(t, err)
-	defer func() {
-		require.NoError(t, sr.Close())
-	}()
+		sr, err := wlog.NewSegmentsReader(s.wal.Dir())
+		require.NoError(t, err)
+		defer func() {
+			require.NoError(t, sr.Close())
+		}()

-	// Read records from WAL and check for expected count of series and samples.
-	var (
-		r   = wlog.NewReader(sr)
-		dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
+		// Read records from WAL and check for expected count of series and samples.
+		var (
+			r   = wlog.NewReader(sr)
+			dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())

-		walSeriesCount, walSamplesCount, walHistogramCount, walFloatHistogramCount, walExemplarsCount int
-	)
-	for r.Next() {
-		rec := r.Record()
-		switch dec.Type(rec) {
-		case record.Series:
-			var series []record.RefSeries
-			series, err = dec.Series(rec, series)
-			require.NoError(t, err)
-			walSeriesCount += len(series)
+			walSeriesCount int
+		)
+		for r.Next() {
+			rec := r.Record()
+			switch dec.Type(rec) {
+			case record.Series:
+				var series []record.RefSeries
+				series, err = dec.Series(rec, series)
+				require.NoError(t, err)
+				walSeriesCount += len(series)

-		case record.Samples:
-			var samples []record.RefSample
-			samples, err = dec.Samples(rec, samples)
-			require.NoError(t, err)
-			walSamplesCount += len(samples)
+			case record.Samples, record.SamplesV2:
+				t.Errorf("should not have found samples")

-		case record.Exemplars:
-			var exemplars []record.RefExemplar
-			exemplars, err = dec.Exemplars(rec, exemplars)
-			require.NoError(t, err)
-			walExemplarsCount += len(exemplars)
+			case record.Exemplars:
+				t.Errorf("should not have found exemplars")

-		case record.HistogramSamples, record.CustomBucketsHistogramSamples:
-			var histograms []record.RefHistogramSample
-			histograms, err = dec.HistogramSamples(rec, histograms)
-			require.NoError(t, err)
-			walHistogramCount += len(histograms)
+			case record.HistogramSamples, record.CustomBucketsHistogramSamples, record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
+				t.Errorf("should not have found histograms")

-		case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
-			var floatHistograms []record.RefFloatHistogramSample
-			floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
-			require.NoError(t, err)
-			walFloatHistogramCount += len(floatHistograms)
-
-		default:
+			default:
+			}
 		}
-	}

-	// Check that only series get stored after calling Rollback.
-	require.Equal(t, numSeries*5, walSeriesCount, "series should have been written to WAL")
-	require.Equal(t, 0, walSamplesCount, "samples should not have been written to WAL")
-	require.Equal(t, 0, walExemplarsCount, "exemplars should not have been written to WAL")
-	require.Equal(t, 0, walHistogramCount, "histograms should not have been written to WAL")
-	require.Equal(t, 0, walFloatHistogramCount, "float histograms should not have been written to WAL")
+		// Check that only series get stored after calling Rollback.
+		require.Equal(t, numSeries*5, walSeriesCount, "series should have been written to WAL")
+	}
 }

 func TestFullTruncateWAL_AppendV2(t *testing.T) {
--- a/tsdb/agent/db_test.go
+++ b/tsdb/agent/db_test.go
@ -226,7 +226,7 @@ func TestCommit(t *testing.T) {
 			require.NoError(t, err)
 			walSeriesCount += len(series)

-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			var samples []record.RefSample
 			samples, err = dec.Samples(rec, samples)
 			require.NoError(t, err)
@ -362,7 +362,7 @@ func TestRollback(t *testing.T) {
 			require.NoError(t, err)
 			walSeriesCount += len(series)

-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			var samples []record.RefSample
 			samples, err = dec.Samples(rec, samples)
 			require.NoError(t, err)
@ -1425,7 +1425,7 @@ func readWALSamples(t *testing.T, walDir string) []walSample {
 			series, err := dec.Series(rec, nil)
 			require.NoError(t, err)
 			lastSeries = series[0]
-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			samples, err = dec.Samples(rec, samples[:0])
 			require.NoError(t, err)
 			for _, s := range samples {
--- a/tsdb/chunkenc/benchmark_test.go
+++ b/tsdb/chunkenc/benchmark_test.go
@ -0,0 +1,343 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunkenc
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/model/timestamp"
+)
+
+type sampleCase struct {
+	name    string
+	samples []triple
+}
+
+type fmtCase struct {
+	name          string
+	newChunkFn    func() Chunk
+	stUnsupported bool
+}
+
+func foreachFmtSampleCase(b *testing.B, fn func(b *testing.B, f fmtCase, s sampleCase)) {
+	const nSamples = 120 // Same as tsdb.DefaultSamplesPerChunk.
+
+	d, err := time.Parse(time.DateTime, "2025-11-04 10:01:05")
+	require.NoError(b, err)
+
+	var (
+		r       = rand.New(rand.NewSource(1)) // Fixed seed for reproducibility.
+		initST  = timestamp.FromTime(d)       // Use realistic timestamp.
+		initT   = initST + 15000              // 15s after initST.
+		initV   = 1243535.123
+		rInts   = make([]int64, 2*nSamples) // Random ints for timestamps and STs.
+		rFloats = make([]float64, nSamples)
+	)
+
+	// Pre-generate random numbers so that adding/removing cases does not change
+	// the generated samples.
+	for i := range nSamples {
+		rInts[i] = int64(r.Intn(100))
+		rInts[nSamples+i] = int64(r.Intn(100))
+		rFloats[i] = float64(r.Intn(100))
+	}
+
+	// tPatterns control how the regular timestamp advances.
+	type tPattern struct {
+		name string
+		next func(t int64, i int) int64
+	}
+	// vPatterns control how the value advances.
+	type vPattern struct {
+		name string
+		next func(v float64, i int) float64
+	}
+	// stPatterns compute the start timestamp from the previous t (before the
+	// step), the new t (after the step), and the sample index.
+	type stPattern struct {
+		name    string
+		compute func(prevT, newT int64, i int) int64
+	}
+
+	tPatterns := []tPattern{
+		{
+			name: "t=constant",
+			next: func(t int64, _ int) int64 { return t + 15000 },
+		},
+		{
+			// 15 seconds ± up to 100ms of jitter.
+			name: "t=jitter",
+			next: func(t int64, i int) int64 { return t + rInts[i] - 50 + 15000 },
+		},
+		{
+			// First 10 samples at constant 60s, then one 10-interval gap (600s),
+			// then 60s ± 30ms jitter. The gap triggers XOR18111 full mode via
+			// multiplier encoding (dod=540000 = 9×60000). Subsequent small-jitter
+			// delta-of-deltas (≤30ms) use XOR18111's 7-bit full-mode code (9 bits
+			// total) vs XOR compact's minimum 14-bit code (16 bits total).
+			name: "t=gap-jitter",
+			next: func(t int64, i int) int64 {
+				if i < 10 {
+					return t + 60000
+				}
+				if i == 10 {
+					return t + 10*60000 // 10-interval gap; triggers XOR18111 full mode.
+				}
+				return t + 60000 + rInts[i]%61 - 30 // 60s ± 30ms jitter.
+			},
+		},
+	}
+	vPatterns := []vPattern{
+		{
+			name: "v=constant",
+			next: func(v float64, _ int) float64 { return v },
+		},
+		// We are not interested in float compression we're not changing it.
+		// {
+		// 	// Varying from -50 to +50 in 100 discrete steps.
+		// 	name: "v=rand-steps",
+		// 	next: func(v float64, i int) float64 { return v + rFloats[i] - 50 },
+		// },
+		// {
+		// 	// Random increment between 0 and 1.0.
+		// 	name: "v=rand0-1",
+		// 	next: func(v float64, i int) float64 { return v + rFloats[i]/100.0 },
+		// },
+		// {
+		// 	// Random decrement between 0 and -1.0. Tests negative varint encoding;
+		// 	// see https://victoriametrics.com/blog/go-protobuf/.
+		// 	name: "v=nrand0-1",
+		// 	next: func(v float64, i int) float64 { return v - rFloats[i]/100.0 },
+		// },
+	}
+	stPatterns := []stPattern{
+		{
+			name:    "st=0",
+			compute: func(_, _ int64, _ int) int64 { return 0 },
+		},
+		{
+			// Constant ST throughout the chunk, typical for long-running counters.
+			name:    "st=cumulative",
+			compute: func(_, _ int64, _ int) int64 { return initST },
+		},
+		{
+			// ST is just after the previous sample's t: tight delta interval.
+			name:    "st=delta-excl",
+			compute: func(prevT, _ int64, _ int) int64 { return prevT + 1 },
+		},
+		{
+			// ST equals the previous sample's t: inclusive delta interval.
+			name:    "st=delta-incl",
+			compute: func(prevT, _ int64, _ int) int64 { return prevT },
+		},
+		{
+			// ST equals the current sample's t.
+			name:    "st=t",
+			compute: func(_, newT int64, _ int) int64 { return newT },
+		},
+		{
+			// ST is equal to the previous t plus up to 100ms of jitter.
+			name:    "st=delta-jitter",
+			compute: func(prevT, _ int64, i int) int64 { return prevT + rInts[nSamples+i] },
+		},
+		{
+			// Cumulative ST with periodic resets 10s before the current t.
+			name: "st=cum-resets",
+			compute: func(_, newT int64, i int) int64 {
+				if i%6 == 5 {
+					return newT - 10000
+				}
+				return initST
+			},
+		},
+		{
+			// Cumulative ST with periodic zero resets.
+			name: "st=cum-zeros",
+			compute: func(_, _ int64, i int) int64 {
+				if i%6 == 5 {
+					return 0
+				}
+				return initST
+			},
+		},
+	}
+
+	var sampleCases []sampleCase
+	for _, tp := range tPatterns {
+		for _, vp := range vPatterns {
+			for _, sp := range stPatterns {
+				samples := make([]triple, 0, nSamples)
+				t, v := initT, initV
+				for i := range nSamples {
+					prevT := t
+					t = tp.next(t, i)
+					v = vp.next(v, i)
+					st := sp.compute(prevT, t, i)
+					samples = append(samples, triple{st: st, t: t, v: v})
+				}
+				sampleCases = append(sampleCases, sampleCase{
+					name:    tp.name + "/" + vp.name + "/" + sp.name,
+					samples: samples,
+				})
+			}
+		}
+	}
+
+	for _, f := range []fmtCase{
+		{name: "XOR", newChunkFn: func() Chunk { return NewXORChunk() }, stUnsupported: true},
+		{name: "XOR2", newChunkFn: func() Chunk { return NewXOR2Chunk() }},
+	} {
+		for _, s := range sampleCases {
+			b.Run(fmt.Sprintf("fmt=%s/%s", f.name, s.name), func(b *testing.B) {
+				fn(b, f, s)
+			})
+		}
+	}
+}
+
+/*
+	export bench=bw.bench/append.v2 && go test \
+	  -run '^$' -bench '^BenchmarkAppender' \
+	  -benchtime 1s -count 6 -cpu 2 -timeout 999m \
+	  | tee ${bench}.txt
+
+For profiles:
+
+	 export bench=bw.bench/appendprof && go test \
+		  -run '^$' -bench '^BenchmarkAppender' \
+		  -benchtime 1s -count 1 -cpu 2 -timeout 999m \
+		  -cpuprofile=${bench}.cpu.pprof \
+		  | tee ${bench}.txt
+*/
+func BenchmarkAppender(b *testing.B) {
+	foreachFmtSampleCase(b, func(b *testing.B, f fmtCase, s sampleCase) {
+		b.ReportAllocs()
+
+		for b.Loop() {
+			c := f.newChunkFn()
+
+			a, err := c.Appender()
+			if err != nil {
+				b.Fatalf("get appender: %s", err)
+			}
+			for _, p := range s.samples {
+				a.Append(p.st, p.t, p.v)
+			}
+			// NOTE: Some buffered implementations only encode on Bytes().
+			b.ReportMetric(float64(len(c.Bytes())), "B/chunk")
+
+			require.Equal(b, len(s.samples), c.NumSamples())
+		}
+	})
+}
+
+/*
+	export bench=bw.bench/iter && go test \
+	  -run '^$' -bench '^BenchmarkIterator' \
+	  -benchtime 1s -count 6 -cpu 2 -timeout 999m \
+	  | tee ${bench}.txt
+
+For profiles:
+
+	 export bench=bw.bench/iterprof && go test \
+		  -run '^$' -bench '^BenchmarkIterator' \
+		  -benchtime 1000000x -count 1 -cpu 2 -timeout 999m \
+	  -cpuprofile=${bench}.cpu.pprof \
+		  | tee ${bench}.txt
+	 export bench=bw.bench/iterprof && go test \
+		  -run '^$' -bench '^BenchmarkIterator' \
+		  -benchtime 1000000x -count 1 -cpu 2 -timeout 999m \
+	  -memprofile=${bench}.mem.pprof \
+		  | tee ${bench}.txt
+*/
+func BenchmarkIterator(b *testing.B) {
+	foreachFmtSampleCase(b, func(b *testing.B, f fmtCase, s sampleCase) {
+		floatEquals := func(a, b float64) bool {
+			return a == b
+		}
+		if f.name == "ALPBuffered" {
+			// Hack as ALP loses precision.
+			floatEquals = func(a, b float64) bool {
+				return math.Abs(a-b) < 1e-6
+			}
+		}
+		b.ReportAllocs()
+
+		c := f.newChunkFn()
+		a, err := c.Appender()
+		if err != nil {
+			b.Fatalf("get appender: %s", err)
+		}
+		for _, p := range s.samples {
+			a.Append(p.st, p.t, p.v)
+		}
+
+		// Some chunk implementations might be buffered. Reset to ensure we don't reuse
+		// appending buffers.
+		c.Reset(c.Bytes())
+
+		// While we are at it, test if encoding/decoding works.
+		it := c.Iterator(nil)
+		require.Equal(b, len(s.samples), c.NumSamples())
+		var got []triple
+		for i := 0; it.Next() == ValFloat; i++ {
+			t, v := it.At()
+			got = append(got, triple{st: it.AtST(), t: t, v: v})
+		}
+		if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
+			require.NoError(b, err)
+		}
+		expectedSamples := s.samples
+		if f.stUnsupported {
+			// If the format does not support ST, zero them out for comparison.
+			expectedSamples = make([]triple, len(s.samples))
+			copy(expectedSamples, s.samples)
+			for i := range s.samples {
+				expectedSamples[i].st = 0
+			}
+		}
+		if diff := cmp.Diff(expectedSamples, got, cmp.AllowUnexported(triple{}), cmp.Comparer(floatEquals)); diff != "" {
+			b.Fatalf("mismatch (-want +got):\n%s", diff)
+		}
+
+		var sink float64
+		// Measure decoding efficiency.
+		for i := 0; b.Loop(); {
+			// Some chunk implementations might be buffered. Reset to ensure we don't reuse
+			// previous decoded data.
+			c.Reset(c.Bytes())
+			b.ReportMetric(float64(len(c.Bytes())), "B/chunk")
+
+			it := c.Iterator(it)
+			for it.Next() == ValFloat {
+				_, v := it.At()
+				sink = v
+				i++
+			}
+			if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
+				require.NoError(b, err)
+			}
+			_ = sink
+		}
+	})
+}
--- a/tsdb/chunkenc/bstream.go
+++ b/tsdb/chunkenc/bstream.go
@ -101,6 +101,7 @@ func (b *bstream) writeByte(byt byte) {

 // writeBits writes the nbits right-most bits of u to the stream
 // in left-to-right order.
+// TODO: Once XOR2 stabilizes, replace writeBits with the writeBitsFast implementation and remove writeBitsFast.
 func (b *bstream) writeBits(u uint64, nbits int) {
 	u <<= 64 - uint(nbits)
 	for nbits >= 8 {
@ -117,6 +118,40 @@ func (b *bstream) writeBits(u uint64, nbits int) {
 	}
 }

+// writeBitsFast is like writeBits but handles the partial last byte inline to
+// avoid per-byte writeByte calls, and writes complete bytes directly to the
+// stream slice.
+func (b *bstream) writeBitsFast(u uint64, nbits int) {
+	u <<= 64 - uint(nbits)
+
+	// If the last byte is partial, fill its remaining bits first.
+	if b.count > 0 {
+		free := int(b.count)
+		last := len(b.stream) - 1
+		b.stream[last] |= byte(u >> uint(64-free))
+		if nbits < free {
+			b.count = uint8(free - nbits)
+			return
+		}
+		u <<= uint(free)
+		nbits -= free
+		b.count = 0
+	}
+
+	// Write complete bytes directly, avoiding per-byte function call overhead.
+	for nbits >= 8 {
+		b.stream = append(b.stream, byte(u>>56))
+		u <<= 8
+		nbits -= 8
+	}
+
+	// Write any remaining bits as a partial final byte.
+	if nbits > 0 {
+		b.stream = append(b.stream, byte(u>>56))
+		b.count = uint8(8 - nbits)
+	}
+}
+
 type bstreamReader struct {
 	stream       []byte
 	streamOffset int // The offset from which read the next byte from the stream.
@ -215,6 +250,156 @@ func (b *bstreamReader) ReadByte() (byte, error) {
 	return byte(v), nil
 }

+// readXOR2ControlFast is like readXOR2Control but returns false when the
+// internal buffer has fewer than 4 valid bits, or when the control prefix
+// indicates cases 4 or 5 (top4 == 0xf). The caller should retry with
+// readXOR2Control. This function must be kept small and a leaf in order to
+// help the compiler inlining it and further improve performance.
+func (b *bstreamReader) readXOR2ControlFast() (uint8, bool) {
+	if b.valid < 4 {
+		return 0, false
+	}
+	top4 := uint8((b.buffer >> (b.valid - 4)) & 0xf)
+	if top4 < 8 { // '0xxx': dod=0, val=0 (case 0).
+		b.valid--
+		return 0, true
+	}
+	if top4 < 12 { // '10xx': dod=0, val changed (case 1).
+		b.valid -= 2
+		return 1, true
+	}
+	if top4 < 14 { // '110x': small dod (case 2).
+		b.valid -= 3
+		return 2, true
+	}
+	if top4 == 14 { // '1110': medium dod (case 3).
+		b.valid -= 4
+		return 3, true
+	}
+	return 0, false
+}
+
+// readXOR2Control reads the XOR2 variable-length joint control prefix
+// and returns 0-5 mapping to the six encoding cases:
+//
+//	0 → '0'     dod=0, val=0               (1 bit consumed)
+//	1 → '10'    dod=0, val≠0               (2 bits consumed)
+//	2 → '110'   dod≠0, 13-bit signed dod   (3 bits consumed)
+//	3 → '1110'  dod≠0, 20-bit signed dod   (4 bits consumed)
+//	4 → '11110' dod≠0, 64-bit escape       (5 bits consumed)
+//	5 → '11111' dod=0, stale NaN           (5 bits consumed)
+//
+// The fast path peeks at 4 bits from the internal buffer; for the '1111'
+// prefix a fifth bit is read to distinguish cases 4 and 5.
+func (b *bstreamReader) readXOR2Control() (uint8, error) {
+	if b.valid >= 4 {
+		top4 := uint8((b.buffer >> (b.valid - 4)) & 0xf)
+		if top4 < 8 { // '0xxx' → case 0.
+			b.valid--
+			return 0, nil
+		}
+		if top4 < 12 { // '10xx' → case 1.
+			b.valid -= 2
+			return 1, nil
+		}
+		if top4 < 14 { // '110x' → case 2.
+			b.valid -= 3
+			return 2, nil
+		}
+		if top4 == 14 { // '1110' → case 3.
+			b.valid -= 4
+			return 3, nil
+		}
+		// '1111': need fifth bit to distinguish cases 4 and 5.
+		if b.valid >= 5 {
+			bit4 := uint8((b.buffer >> (b.valid - 5)) & 1)
+			b.valid -= 5
+			return 4 + bit4, nil
+		}
+		// Fifth bit spans a buffer boundary; consume the four known bits
+		// and read the fifth from the stream.
+		b.valid -= 4
+		bit4, err := b.readBit()
+		if err != nil {
+			return 0, err
+		}
+		if bit4 == zero {
+			return 4, nil
+		}
+		return 5, nil
+	}
+
+	// Slow path: bits may span buffer boundaries, read one at a time.
+	bit0, err := b.readBit()
+	if err != nil {
+		return 0, err
+	}
+	if bit0 == zero {
+		return 0, nil
+	}
+	bit1, err := b.readBit()
+	if err != nil {
+		return 0, err
+	}
+	if bit1 == zero {
+		return 1, nil
+	}
+	bit2, err := b.readBit()
+	if err != nil {
+		return 0, err
+	}
+	if bit2 == zero {
+		return 2, nil
+	}
+	bit3, err := b.readBit()
+	if err != nil {
+		return 0, err
+	}
+	if bit3 == zero {
+		return 3, nil
+	}
+	bit4, err := b.readBit()
+	if err != nil {
+		return 0, err
+	}
+	if bit4 == zero {
+		return 4, nil
+	}
+	return 5, nil
+}
+
+// readUvarint decodes a varint-encoded uint64 using direct method calls,
+// avoiding the io.ByteReader interface dispatch used by binary.ReadUvarint,
+// which causes the receiver to escape to the heap.
+func (b *bstreamReader) readUvarint() (uint64, error) {
+	var x uint64
+	var s uint
+	for range binary.MaxVarintLen64 {
+		byt, err := b.ReadByte()
+		if err != nil {
+			return x, err
+		}
+		if byt < 0x80 {
+			return x | uint64(byt)<<s, nil
+		}
+		x |= uint64(byt&0x7f) << s
+		s += 7
+	}
+	return x, io.ErrUnexpectedEOF
+}
+
+// readVarint decodes a varint-encoded int64 using direct method calls,
+// avoiding the io.ByteReader interface dispatch used by binary.ReadVarint,
+// which causes the receiver to escape to the heap.
+func (b *bstreamReader) readVarint() (int64, error) {
+	ux, err := b.readUvarint()
+	x := int64(ux >> 1)
+	if ux&1 != 0 {
+		x = ^x
+	}
+	return x, err
+}
+
 // loadNextBuffer loads the next bytes from the stream into the internal buffer.
 // The input nbits is the minimum number of bits that must be read, but the implementation
 // can read more (if possible) to improve performances.
--- a/tsdb/chunkenc/bstream_test.go
+++ b/tsdb/chunkenc/bstream_test.go
@ -14,6 +14,7 @@
 package chunkenc

 import (
+	"fmt"
 	"testing"

 	"github.com/stretchr/testify/require"
@ -32,6 +33,44 @@ func TestBstream_Reset(t *testing.T) {
 	}, bs)
 }

+// BenchmarkWriteBits benchmarks writeBits for various bit widths.
+func BenchmarkWriteBits(b *testing.B) {
+	sizes := []int{1, 8, 17, 32, 52, 64}
+	for _, nbits := range sizes {
+		b.Run(fmt.Sprintf("nbits=%d", nbits), func(b *testing.B) {
+			b.ReportAllocs()
+			var bs bstream
+			bs.stream = make([]byte, 0, 1024)
+			for range b.N {
+				bs.stream = bs.stream[:0]
+				bs.count = 0
+				for j := range 100 {
+					bs.writeBits(uint64(j), nbits)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkWriteBitsFast benchmarks writeBitsFast for various bit widths.
+func BenchmarkWriteBitsFast(b *testing.B) {
+	sizes := []int{1, 8, 17, 32, 52, 64}
+	for _, nbits := range sizes {
+		b.Run(fmt.Sprintf("nbits=%d", nbits), func(b *testing.B) {
+			b.ReportAllocs()
+			var bs bstream
+			bs.stream = make([]byte, 0, 1024)
+			for range b.N {
+				bs.stream = bs.stream[:0]
+				bs.count = 0
+				for j := range 100 {
+					bs.writeBitsFast(uint64(j), nbits)
+				}
+			}
+		})
+	}
+}
+
 func TestBstreamReader(t *testing.T) {
 	// Write to the bit stream.
 	w := bstream{}
--- a/tsdb/chunkenc/chunk.go
+++ b/tsdb/chunkenc/chunk.go
@ -30,6 +30,7 @@ const (
 	EncXOR
 	EncHistogram
 	EncFloatHistogram
+	EncXOR2
 )

 func (e Encoding) String() string {
@ -42,13 +43,15 @@ func (e Encoding) String() string {
 		return "histogram"
 	case EncFloatHistogram:
 		return "floathistogram"
+	case EncXOR2:
+		return "XOR2"
 	}
 	return "<unknown>"
 }

 // IsValidEncoding returns true for supported encodings.
 func IsValidEncoding(e Encoding) bool {
-	return e == EncXOR || e == EncHistogram || e == EncFloatHistogram
+	return e == EncXOR || e == EncHistogram || e == EncFloatHistogram || e == EncXOR2
 }

 const (
@ -73,6 +76,8 @@ type Chunk interface {
 	Bytes() []byte

 	// Encoding returns the encoding type of the chunk.
+	// If the chunk is capable of storing ST (start timestamps), it should
+	// return the appropriate encoding type (e.g., EncXOR2).
 	Encoding() Encoding

 	// Appender returns an appender to append samples to the chunk.
@ -186,9 +191,12 @@ func (v ValueType) String() string {
 	}
 }

-func (v ValueType) ChunkEncoding() Encoding {
+func (v ValueType) ChunkEncoding(useXOR2 bool) Encoding {
 	switch v {
 	case ValFloat:
+		if useXOR2 {
+			return EncXOR2
+		}
 		return EncXOR
 	case ValHistogram:
 		return EncHistogram
@ -199,17 +207,9 @@ func (v ValueType) ChunkEncoding() Encoding {
 	}
 }

-func (v ValueType) NewChunk() (Chunk, error) {
-	switch v {
-	case ValFloat:
-		return NewXORChunk(), nil
-	case ValHistogram:
-		return NewHistogramChunk(), nil
-	case ValFloatHistogram:
-		return NewFloatHistogramChunk(), nil
-	default:
-		return nil, fmt.Errorf("value type %v unsupported", v)
-	}
+// NewChunk returns a new empty chunk for the given value type.
+func (v ValueType) NewChunk(useXOR2 bool) (Chunk, error) {
+	return NewEmptyChunk(v.ChunkEncoding(useXOR2))
 }

 // MockSeriesIterator returns an iterator for a mock series with custom
@ -299,6 +299,7 @@ type pool struct {
 	xor            sync.Pool
 	histogram      sync.Pool
 	floatHistogram sync.Pool
+	xo2            sync.Pool
 }

 // NewPool returns a new pool.
@ -319,6 +320,11 @@ func NewPool() Pool {
 				return &FloatHistogramChunk{b: bstream{}}
 			},
 		},
+		xo2: sync.Pool{
+			New: func() any {
+				return &XOR2Chunk{b: bstream{}}
+			},
+		},
 	}
 }

@ -331,6 +337,8 @@ func (p *pool) Get(e Encoding, b []byte) (Chunk, error) {
 		c = p.histogram.Get().(*HistogramChunk)
 	case EncFloatHistogram:
 		c = p.floatHistogram.Get().(*FloatHistogramChunk)
+	case EncXOR2:
+		c = p.xo2.Get().(*XOR2Chunk)
 	default:
 		return nil, fmt.Errorf("invalid chunk encoding %q", e)
 	}
@ -352,6 +360,9 @@ func (p *pool) Put(c Chunk) error {
 	case EncFloatHistogram:
 		_, ok = c.(*FloatHistogramChunk)
 		sp = &p.floatHistogram
+	case EncXOR2:
+		_, ok = c.(*XOR2Chunk)
+		sp = &p.xo2
 	default:
 		return fmt.Errorf("invalid chunk encoding %q", c.Encoding())
 	}
@ -378,6 +389,8 @@ func FromData(e Encoding, d []byte) (Chunk, error) {
 		return &HistogramChunk{b: bstream{count: 0, stream: d}}, nil
 	case EncFloatHistogram:
 		return &FloatHistogramChunk{b: bstream{count: 0, stream: d}}, nil
+	case EncXOR2:
+		return &XOR2Chunk{b: bstream{count: 0, stream: d}}, nil
 	}
 	return nil, fmt.Errorf("invalid chunk encoding %q", e)
 }
@ -391,6 +404,8 @@ func NewEmptyChunk(e Encoding) (Chunk, error) {
 		return NewHistogramChunk(), nil
 	case EncFloatHistogram:
 		return NewFloatHistogramChunk(), nil
+	case EncXOR2:
+		return NewXOR2Chunk(), nil
 	}
 	return nil, fmt.Errorf("invalid chunk encoding %q", e)
 }
--- a/tsdb/chunkenc/chunk_test.go
+++ b/tsdb/chunkenc/chunk_test.go
@ -16,36 +16,41 @@ package chunkenc
 import (
 	"errors"
 	"fmt"
-	"io"
 	"math/rand"
 	"testing"

 	"github.com/stretchr/testify/require"
 )

-type pair struct {
-	t int64
-	v float64
+type triple struct {
+	st, t int64
+	v     float64
 }

 func TestChunk(t *testing.T) {
-	for enc, nc := range map[Encoding]func() Chunk{
-		EncXOR: func() Chunk { return NewXORChunk() },
-	} {
-		t.Run(fmt.Sprintf("%v", enc), func(t *testing.T) {
+	testcases := []struct {
+		encoding   Encoding
+		supportsST bool
+		factory    func() Chunk
+	}{
+		{encoding: EncXOR, supportsST: false, factory: func() Chunk { return NewXORChunk() }},
+		{encoding: EncXOR2, supportsST: true, factory: func() Chunk { return NewXOR2Chunk() }},
+	}
+	for _, tc := range testcases {
+		t.Run(fmt.Sprintf("%v", tc.encoding), func(t *testing.T) {
 			for range make([]struct{}, 1) {
-				c := nc()
-				testChunk(t, c)
+				c := tc.factory()
+				testChunk(t, c, tc.supportsST)
 			}
 		})
 	}
 }

-func testChunk(t *testing.T, c Chunk) {
+func testChunk(t *testing.T, c Chunk, supportsST bool) {
 	app, err := c.Appender()
 	require.NoError(t, err)

-	var exp []pair
+	var exp []triple
 	var (
 		ts = int64(1234123324)
 		v  = 1243535.123
@ -65,26 +70,30 @@ func testChunk(t *testing.T, c Chunk) {
 			require.NoError(t, err)
 		}

-		app.Append(0, ts, v)
-		exp = append(exp, pair{t: ts, v: v})
+		app.Append(ts-100, ts, v)
+		expST := int64(0)
+		if supportsST {
+			expST = ts - 100
+		}
+		exp = append(exp, triple{st: expST, t: ts, v: v})
 	}

 	// 1. Expand iterator in simple case.
 	it1 := c.Iterator(nil)
-	var res1 []pair
+	var res1 []triple
 	for it1.Next() == ValFloat {
 		ts, v := it1.At()
-		res1 = append(res1, pair{t: ts, v: v})
+		res1 = append(res1, triple{st: it1.AtST(), t: ts, v: v})
 	}
 	require.NoError(t, it1.Err())
 	require.Equal(t, exp, res1)

 	// 2. Expand second iterator while reusing first one.
 	it2 := c.Iterator(it1)
-	var res2 []pair
+	var res2 []triple
 	for it2.Next() == ValFloat {
 		ts, v := it2.At()
-		res2 = append(res2, pair{t: ts, v: v})
+		res2 = append(res2, triple{st: it2.AtST(), t: ts, v: v})
 	}
 	require.NoError(t, it2.Err())
 	require.Equal(t, exp, res2)
@ -93,18 +102,22 @@ func testChunk(t *testing.T, c Chunk) {
 	mid := len(exp) / 2

 	it3 := c.Iterator(nil)
-	var res3 []pair
+	var res3 []triple
 	require.Equal(t, ValFloat, it3.Seek(exp[mid].t))
 	// Below ones should not matter.
 	require.Equal(t, ValFloat, it3.Seek(exp[mid].t))
 	require.Equal(t, ValFloat, it3.Seek(exp[mid].t))
 	ts, v = it3.At()
-	res3 = append(res3, pair{t: ts, v: v})
+	res3 = append(res3, triple{st: it3.AtST(), t: ts, v: v})

+	lastTs := ts
 	for it3.Next() == ValFloat {
 		ts, v := it3.At()
-		res3 = append(res3, pair{t: ts, v: v})
+		lastTs = ts
+		res3 = append(res3, triple{st: it3.AtST(), t: ts, v: v})
 	}
+	// Seeking to last timestamp should work and it is a no-op.
+	require.Equal(t, ValFloat, it3.Seek(lastTs))
 	require.NoError(t, it3.Err())
 	require.Equal(t, exp[mid:], res3)
 	require.Equal(t, ValNone, it3.Seek(exp[len(exp)-1].t+1))
@ -129,6 +142,10 @@ func TestPool(t *testing.T) {
 			name:     "float histogram",
 			encoding: EncFloatHistogram,
 		},
+		{
+			name:     "xor opt st",
+			encoding: EncXOR2,
+		},
 		{
 			name:     "invalid encoding",
 			encoding: EncNone,
@ -150,6 +167,8 @@ func TestPool(t *testing.T) {
 				b = &c.(*HistogramChunk).b
 			case EncFloatHistogram:
 				b = &c.(*FloatHistogramChunk).b
+			case EncXOR2:
+				b = &c.(*XOR2Chunk).b
 			default:
 				b = &c.(*XORChunk).b
 			}
@ -199,111 +218,3 @@ func (c fakeChunk) Encoding() Encoding {
 func (c fakeChunk) Reset([]byte) {
 	c.t.Fatal("Reset should not be called")
 }
-
-func benchmarkIterator(b *testing.B, newChunk func() Chunk) {
-	const samplesPerChunk = 250
-	var (
-		t   = int64(1234123324)
-		v   = 1243535.123
-		exp []pair
-	)
-	for range samplesPerChunk {
-		// t += int64(rand.Intn(10000) + 1)
-		t += int64(1000)
-		// v = rand.Float64()
-		v += float64(100)
-		exp = append(exp, pair{t: t, v: v})
-	}
-
-	chunk := newChunk()
-	{
-		a, err := chunk.Appender()
-		if err != nil {
-			b.Fatalf("get appender: %s", err)
-		}
-		j := 0
-		for _, p := range exp {
-			if j > 250 {
-				break
-			}
-			a.Append(0, p.t, p.v)
-			j++
-		}
-	}
-
-	b.ReportAllocs()
-
-	var res float64
-	var it Iterator
-	for i := 0; b.Loop(); {
-		it := chunk.Iterator(it)
-
-		for it.Next() == ValFloat {
-			_, v := it.At()
-			res = v
-			i++
-		}
-		if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
-			require.NoError(b, err)
-		}
-		_ = res
-	}
-}
-
-func newXORChunk() Chunk {
-	return NewXORChunk()
-}
-
-func BenchmarkXORIterator(b *testing.B) {
-	benchmarkIterator(b, newXORChunk)
-}
-
-func BenchmarkXORAppender(b *testing.B) {
-	r := rand.New(rand.NewSource(1))
-	b.Run("constant", func(b *testing.B) {
-		benchmarkAppender(b, func() (int64, float64) {
-			return 1000, 0
-		}, newXORChunk)
-	})
-	b.Run("random steps", func(b *testing.B) {
-		benchmarkAppender(b, func() (int64, float64) {
-			return int64(r.Intn(100) - 50 + 15000), // 15 seconds +- up to 100ms of jitter.
-				float64(r.Intn(100) - 50) // Varying from -50 to +50 in 100 discrete steps.
-		}, newXORChunk)
-	})
-	b.Run("random 0-1", func(b *testing.B) {
-		benchmarkAppender(b, func() (int64, float64) {
-			return int64(r.Intn(100) - 50 + 15000), // 15 seconds +- up to 100ms of jitter.
-				r.Float64() // Random between 0 and 1.0.
-		}, newXORChunk)
-	})
-}
-
-func benchmarkAppender(b *testing.B, deltas func() (int64, float64), newChunk func() Chunk) {
-	var (
-		t = int64(1234123324)
-		v = 1243535.123
-	)
-	const nSamples = 120 // Same as tsdb.DefaultSamplesPerChunk.
-	var exp []pair
-	for range nSamples {
-		dt, dv := deltas()
-		t += dt
-		v += dv
-		exp = append(exp, pair{t: t, v: v})
-	}
-
-	b.ReportAllocs()
-
-	for b.Loop() {
-		c := newChunk()
-
-		a, err := c.Appender()
-		if err != nil {
-			b.Fatalf("get appender: %s", err)
-		}
-		for _, p := range exp {
-			a.Append(0, p.t, p.v)
-		}
-	}
-}
--- a/tsdb/chunkenc/st_helper_test.go
+++ b/tsdb/chunkenc/st_helper_test.go
@ -0,0 +1,156 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunkenc
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/model/histogram"
+)
+
+// testChunkSTHandling tests handling of start times in chunks.
+// It uses 0-4 samples with timestamp 1000,2000,3000,4000 and monotonically
+// increasing start times that are chosen from 0-(ts-500) for each sample.
+// All combinations of start times are tested for each number of samples.
+func testChunkSTHandling(t *testing.T, vt ValueType, chunkFactory func() Chunk) {
+	sampleAppend := func(app Appender, vt ValueType, st, ts int64, v float64) {
+		switch vt {
+		case ValFloat:
+			app.Append(st, ts, v)
+		case ValHistogram:
+			_, recoded, _, err := app.AppendHistogram(nil, st, ts, &histogram.Histogram{Sum: v, Count: uint64(v * 10)}, false)
+			require.NoError(t, err)
+			require.False(t, recoded)
+		case ValFloatHistogram:
+			_, recoded, _, err := app.AppendFloatHistogram(nil, st, ts, &histogram.FloatHistogram{Sum: v, Count: v * 10}, false)
+			require.NoError(t, err)
+			require.False(t, recoded)
+		default:
+			t.Fatalf("unsupported value type %v", vt)
+		}
+	}
+
+	get := func(it Iterator, vt ValueType) (int64, int64, float64) {
+		switch vt {
+		case ValFloat:
+			ts, v := it.At()
+			return it.AtST(), ts, v
+		case ValHistogram:
+			ts, h := it.AtHistogram(nil)
+			return it.AtST(), ts, float64(h.Sum)
+		case ValFloatHistogram:
+			ts, fh := it.AtFloatHistogram(nil)
+			return it.AtST(), ts, fh.Sum
+		default:
+			t.Fatalf("unsupported value type %v", vt)
+			return 0, 0, 0
+		}
+	}
+
+	runCase := func(t *testing.T, samples []triple) {
+		chunk := chunkFactory()
+		app, err := chunk.Appender()
+		require.NoError(t, err)
+		var clone []byte
+		for i, s := range samples {
+			if i == len(samples)-1 {
+				clone = append(clone, chunk.Bytes()...)
+			}
+			sampleAppend(app, vt, s.st, s.t, s.v)
+		}
+		chunksToTest := []Chunk{chunk}
+
+		if len(samples) > 0 {
+			// If there are samples, also test that appending to a chunk cloned from the original chunk works correctly.
+			// This tests resuming the appender from a previous chunk.
+			cloneChunk := chunkFactory()
+			cloneChunk.Reset(clone)
+			cloneApp, err := cloneChunk.Appender()
+			require.NoError(t, err)
+			sampleAppend(cloneApp, vt, samples[len(samples)-1].st, samples[len(samples)-1].t, samples[len(samples)-1].v)
+			chunksToTest = append(chunksToTest, cloneChunk)
+		}
+
+		printChunkName := func(i int) string {
+			if i == 0 {
+				return "original"
+			}
+			return "cloned"
+		}
+
+		for ci, chk := range chunksToTest {
+			require.Equal(t, len(samples), chk.NumSamples(), "%s chunk: number of samples mismatch", printChunkName(ci))
+			it := chk.Iterator(nil)
+			for i, s := range samples {
+				require.Equal(t, vt, it.Next(), "%s[%d]: value type mismatch", printChunkName(ci), i)
+				st, ts, f := get(it, vt)
+				require.Equal(t, s.t, ts, "%s[%d]: timestamp mismatch", printChunkName(ci), i)
+				require.Equal(t, s.st, st, "%s[%d]: start time mismatch", printChunkName(ci), i)
+				require.InDelta(t, s.v, f, 1e-9, "%s[%d]: value mismatch", printChunkName(ci), i)
+			}
+			require.Equal(t, ValNone, it.Next())
+			require.NoError(t, it.Err())
+		}
+	}
+
+	t.Run("manual for debugging", func(t *testing.T) {
+		samples := []triple{
+			{st: 0, t: 1000, v: 1.5},
+			{st: 0, t: 2000, v: 2.5},
+			{st: 0, t: 3000, v: 3.5},
+			{st: 0, t: 4000, v: 4.5},
+		}
+		runCase(t, samples)
+	})
+
+	stTimes := []int64{0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000}
+
+	ts := func(j int) int64 {
+		return int64(1000 * (j + 1))
+	}
+
+	for numberOfSamples := range 5 {
+		samples := make([]triple, numberOfSamples)
+		sampleSTidx := make([]int, numberOfSamples)
+		for {
+			for j := range numberOfSamples {
+				samples[j] = triple{
+					st: stTimes[sampleSTidx[j]],
+					t:  ts(j),
+					v:  float64(j) + 0.5,
+				}
+			}
+
+			t.Run(fmt.Sprintf("%v", samples), func(t *testing.T) {
+				runCase(t, samples)
+			})
+
+			exhausted := true
+			for j := numberOfSamples - 1; j >= 0; j-- {
+				if stTimes[sampleSTidx[j]] < ts(j) {
+					sampleSTidx[j]++
+					exhausted = false
+					break
+				}
+				sampleSTidx[j] = 0
+			}
+			if exhausted {
+				break
+			}
+		}
+	}
+}
--- a/tsdb/chunkenc/xor2.go
+++ b/tsdb/chunkenc/xor2.go
@ -0,0 +1,889 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// XOR2Chunk implements XOR encoding with joint timestamp+value control bits
+// and byte-packed dod encoding for efficient appending. It also has an extra
+// header byte after the sample count to allow for optionally encoding start
+// timestamp (ST).
+//
+// Control prefix for samples >= 2:
+//
+//	0     → dod=0 AND value unchanged              (1 bit)
+//	10    → dod=0, value changed                   (2 bits, then value encoding)
+//	110   → dod≠0, 13-bit signed [-4096, 4095]     (prefix+dod packed into 2 bytes)
+//	1110  → dod≠0, 20-bit signed [-524288, 524287] (prefix+dod packed into 3 bytes)
+//	11110 → dod≠0, 64-bit escape                   (5+64 bits, then value encoding)
+//	11111 → dod=0, stale NaN                       (5 bits, no value field)
+//
+// The dod bins are widened so that prefix+dod aligns to byte boundaries,
+// replacing writeBit calls with writeByte for common cases.
+//
+// Value encoding for the dod≠0 cases (`<varbit_xor2>`):
+//
+//	0   → value unchanged
+//	10  → reuse previous leading/trailing window
+//	110 → new leading/trailing window
+//	111 → stale NaN
+//
+// Value encoding for the dod=0, value-changed case (`<varbit_xor2_nn>`):
+//
+//	0 → reuse previous leading/trailing window
+//	1 → new leading/trailing window
+//
+// Start timestamp (ST) encoding:
+//
+// 1-byte ST header (at b[chunkHeaderSize]) layout:
+//
+//	bit 7 (0x80): firstSTKnown   — ST for the first sample is present in the stream
+//	bits 6-0:    firstSTChangeOn — sample index where the first ST change begins
+//
+// When no ST is provided (st == 0 always), the header stays 0x00 and the
+// chunk has no additional bits in it.
+//
+// When ST is present, the ST delta (prevT - st) is appended after each
+// sample's joint timestamp+value encoding using putVarbitInt.
+
+package chunkenc
+
+import (
+	"encoding/binary"
+	"math"
+	"math/bits"
+
+	"github.com/prometheus/prometheus/model/histogram"
+	"github.com/prometheus/prometheus/model/value"
+)
+
+const (
+	chunkSTHeaderSize  = 1
+	maxFirstSTChangeOn = 0x7F
+)
+
+func writeHeaderFirstSTKnown(b []byte) {
+	b[0] = 0x80
+}
+
+func writeHeaderFirstSTChangeOn(b []byte, firstSTChangeOn uint16) {
+	// First bit indicates the initial ST value.
+	// Here we save the sample number from where the first change occurs in the
+	// rest of the byte (7 bits)
+
+	if firstSTChangeOn > maxFirstSTChangeOn {
+		// This should never happen, would cause corruption (ST already skipped but shouldn't).
+		return
+	}
+	b[0] |= uint8(firstSTChangeOn)
+}
+
+func readSTHeader(b []byte) (firstSTKnown bool, firstSTChangeOn uint8) {
+	if b[0] == 0x00 {
+		return false, 0
+	}
+	if b[0] == 0x80 {
+		return true, 0
+	}
+	mask := byte(0x80)
+	if b[0]&mask != 0 {
+		firstSTKnown = true
+	}
+	mask = 0x7F
+	return firstSTKnown, b[0] & mask
+}
+
+// XOR2Chunk holds XOR2 encoded samples with optional start
+// timestamp per chunk or per sample.
+type XOR2Chunk struct {
+	b bstream
+}
+
+// NewXOR2Chunk returns a new chunk with XOR2 encoding.
+func NewXOR2Chunk() *XOR2Chunk {
+	b := make([]byte, chunkHeaderSize+chunkSTHeaderSize, chunkAllocationSize)
+	return &XOR2Chunk{b: bstream{stream: b, count: 0}}
+}
+
+func (c *XOR2Chunk) Reset(stream []byte) {
+	c.b.Reset(stream)
+}
+
+// Encoding returns the encoding type.
+func (*XOR2Chunk) Encoding() Encoding {
+	return EncXOR2
+}
+
+// Bytes returns the underlying byte slice of the chunk.
+func (c *XOR2Chunk) Bytes() []byte {
+	return c.b.bytes()
+}
+
+// NumSamples returns the number of samples in the chunk.
+func (c *XOR2Chunk) NumSamples() int {
+	return int(binary.BigEndian.Uint16(c.Bytes()))
+}
+
+// Compact implements the Chunk interface.
+func (c *XOR2Chunk) Compact() {
+	if l := len(c.b.stream); cap(c.b.stream) > l+chunkCompactCapacityThreshold {
+		buf := make([]byte, l)
+		copy(buf, c.b.stream)
+		c.b.stream = buf
+	}
+}
+
+// Appender implements the Chunk interface.
+func (c *XOR2Chunk) Appender() (Appender, error) {
+	if len(c.b.stream) == chunkHeaderSize+chunkSTHeaderSize {
+		return &xor2Appender{
+			b:       &c.b,
+			t:       math.MinInt64,
+			leading: 0xff,
+		}, nil
+	}
+	it := c.iterator(nil)
+
+	for it.Next() != ValNone {
+	}
+	if err := it.Err(); err != nil {
+		return nil, err
+	}
+
+	// Set the bit position for continuing writes. The iterator's reader tracks
+	// how many bits remain unread in the last byte.
+	c.b.count = it.br.valid
+
+	a := &xor2Appender{
+		b:               &c.b,
+		st:              it.st,
+		t:               it.t,
+		v:               it.baselineV,
+		tDelta:          it.tDelta,
+		stDiff:          it.stDiff,
+		leading:         it.leading,
+		trailing:        it.trailing,
+		numTotal:        binary.BigEndian.Uint16(c.b.bytes()),
+		firstSTKnown:    it.firstSTKnown,
+		firstSTChangeOn: uint16(it.firstSTChangeOn),
+	}
+	return a, nil
+}
+
+func (c *XOR2Chunk) iterator(it Iterator) *xor2Iterator {
+	if iter, ok := it.(*xor2Iterator); ok {
+		iter.Reset(c.b.bytes())
+		return iter
+	}
+	iter := &xor2Iterator{}
+	iter.Reset(c.b.bytes())
+	return iter
+}
+
+// Iterator implements the Chunk interface.
+func (c *XOR2Chunk) Iterator(it Iterator) Iterator {
+	return c.iterator(it)
+}
+
+// xor2Appender appends samples with optional start timestamps using
+// the XOR2 joint control bit encoding for regular timestamp and value,
+// and putVarbitInt for the start timestamp delta.
+type xor2Appender struct {
+	b *bstream
+
+	st     int64
+	t      int64
+	v      float64
+	tDelta uint64
+	stDiff int64 // prevT - st for the previous sample.
+
+	leading  uint8
+	trailing uint8
+
+	numTotal        uint16
+	firstSTChangeOn uint16
+	firstSTKnown    bool
+}
+
+func (a *xor2Appender) Append(st, t int64, v float64) {
+	var (
+		tDelta uint64
+		stDiff int64
+	)
+
+	switch a.numTotal {
+	case 0:
+		buf := make([]byte, binary.MaxVarintLen64)
+		for _, b := range buf[:binary.PutVarint(buf, t)] {
+			a.b.writeByte(b)
+		}
+		a.b.writeBitsFast(math.Float64bits(v), 64)
+
+		if st != 0 {
+			for _, b := range buf[:binary.PutVarint(buf, t-st)] {
+				a.b.writeByte(b)
+			}
+			a.firstSTKnown = true
+			writeHeaderFirstSTKnown(a.b.bytes()[chunkHeaderSize:])
+		}
+
+	case 1:
+		tDelta = uint64(t - a.t)
+
+		buf := make([]byte, binary.MaxVarintLen64)
+		for _, b := range buf[:binary.PutUvarint(buf, tDelta)] {
+			a.b.writeByte(b)
+		}
+
+		a.writeVDelta(v)
+
+		if st != a.st {
+			stDiff = a.t - st
+			a.firstSTChangeOn = 1
+			writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], 1)
+			putVarbitInt(a.b, stDiff)
+		}
+
+	default:
+		tDelta = uint64(t - a.t)
+		dod := int64(tDelta - a.tDelta)
+
+		// Fast path: no ST involvement at all.
+		if st == 0 && a.numTotal != maxFirstSTChangeOn && a.firstSTChangeOn == 0 && !a.firstSTKnown {
+			a.encodeJoint(dod, v)
+			a.t = t
+			if !value.IsStaleNaN(v) {
+				a.v = v
+			}
+			a.tDelta = tDelta
+			a.numTotal++
+			binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal)
+			return
+		}
+
+		// Slow path: ST may be involved.
+		a.encodeJoint(dod, v)
+
+		if a.firstSTChangeOn == 0 {
+			if st != a.st || a.numTotal == maxFirstSTChangeOn {
+				// First ST change: record prevT - st.
+				stDiff = a.t - st
+				a.firstSTChangeOn = a.numTotal
+				writeHeaderFirstSTChangeOn(a.b.bytes()[chunkHeaderSize:], a.numTotal)
+				putVarbitInt(a.b, stDiff)
+			}
+		} else {
+			stDiff = a.t - st
+			putVarbitInt(a.b, stDiff-a.stDiff)
+		}
+	}
+
+	a.st = st
+	a.t = t
+	if !value.IsStaleNaN(v) {
+		a.v = v
+	}
+	a.tDelta = tDelta
+	a.stDiff = stDiff
+	a.numTotal++
+	binary.BigEndian.PutUint16(a.b.bytes(), a.numTotal)
+}
+
+// encodeJoint writes the XOR2 joint timestamp+value control sequence for
+// samples >= 2.
+func (a *xor2Appender) encodeJoint(dod int64, v float64) {
+	if dod == 0 {
+		if value.IsStaleNaN(v) {
+			a.b.writeBitsFast(0b11111, 5)
+			return
+		}
+		vbits := math.Float64bits(v) ^ math.Float64bits(a.v)
+		if vbits == 0 {
+			a.b.writeBit(zero)
+			return
+		}
+		a.b.writeBitsFast(0b10, 2)
+		a.writeVDeltaKnownNonZero(vbits)
+		return
+	}
+
+	switch {
+	case dod >= -(1<<12) && dod <= (1<<12)-1:
+		// 13-bit dod: prefix `110` packed with top 5 bits → 2 bytes total.
+		a.b.writeByte(0b110_00000 | byte(uint64(dod)>>8)&0x1F)
+		a.b.writeByte(byte(uint64(dod)))
+	case dod >= -(1<<19) && dod <= (1<<19)-1:
+		// 20-bit dod: prefix `1110` packed with top 4 bits → 3 bytes total.
+		a.b.writeByte(0b1110_0000 | byte(uint64(dod)>>16)&0x0F)
+		a.b.writeByte(byte(uint64(dod) >> 8))
+		a.b.writeByte(byte(uint64(dod)))
+	default:
+		// 64-bit escape (rare): `11110`.
+		a.b.writeBitsFast(0b11110, 5)
+		a.b.writeBitsFast(uint64(dod), 64)
+	}
+	a.writeVDelta(v)
+}
+
+// writeVDelta encodes the value delta for the dod≠0 case.
+func (a *xor2Appender) writeVDelta(v float64) {
+	if value.IsStaleNaN(v) {
+		a.b.writeBitsFast(0b111, 3)
+		return
+	}
+
+	delta := math.Float64bits(v) ^ math.Float64bits(a.v)
+
+	if delta == 0 {
+		a.b.writeBit(zero)
+		return
+	}
+
+	newLeading := uint8(bits.LeadingZeros64(delta))
+	newTrailing := uint8(bits.TrailingZeros64(delta))
+
+	if newLeading >= 32 {
+		newLeading = 31
+	}
+
+	if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing {
+		a.b.writeBitsFast(0b10, 2)
+		a.b.writeBitsFast(delta>>a.trailing, 64-int(a.leading)-int(a.trailing))
+		return
+	}
+
+	a.leading, a.trailing = newLeading, newTrailing
+
+	a.b.writeBitsFast(0b110, 3)
+	a.b.writeBitsFast(uint64(newLeading), 5)
+
+	sigbits := 64 - newLeading - newTrailing
+	a.b.writeBitsFast(uint64(sigbits), 6)
+	a.b.writeBitsFast(delta>>newTrailing, int(sigbits))
+}
+
+// writeVDeltaKnownNonZero encodes a precomputed value XOR delta for the
+// dod=0, value-changed case. delta must be non-zero or staleNaN. Stale NaN with dod=0 is
+// handled at the joint control level (`11111`) and never reaches this function.
+//
+// Encoding:
+//
+//	`0` → reuse previous leading/trailing window
+//	`1` → new leading/trailing window
+func (a *xor2Appender) writeVDeltaKnownNonZero(delta uint64) {
+	newLeading := uint8(bits.LeadingZeros64(delta))
+	newTrailing := uint8(bits.TrailingZeros64(delta))
+
+	if newLeading >= 32 {
+		newLeading = 31
+	}
+
+	if a.leading != 0xff && newLeading >= a.leading && newTrailing >= a.trailing {
+		a.b.writeBit(zero)
+		a.b.writeBitsFast(delta>>a.trailing, 64-int(a.leading)-int(a.trailing))
+		return
+	}
+
+	a.leading, a.trailing = newLeading, newTrailing
+
+	a.b.writeBit(one)
+	a.b.writeBitsFast(uint64(newLeading), 5)
+
+	sigbits := 64 - newLeading - newTrailing
+	a.b.writeBitsFast(uint64(sigbits), 6)
+	a.b.writeBitsFast(delta>>newTrailing, int(sigbits))
+}
+
+func (*xor2Appender) AppendHistogram(*HistogramAppender, int64, int64, *histogram.Histogram, bool) (Chunk, bool, Appender, error) {
+	panic("appended a histogram sample to a float chunk")
+}
+
+func (*xor2Appender) AppendFloatHistogram(*FloatHistogramAppender, int64, int64, *histogram.FloatHistogram, bool) (Chunk, bool, Appender, error) {
+	panic("appended a float histogram sample to a float chunk")
+}
+
+// xor2Iterator decodes XOR2 chunks.
+type xor2Iterator struct {
+	br       bstreamReader
+	numTotal uint16
+	numRead  uint16
+
+	firstSTKnown    bool
+	firstSTChangeOn uint8
+
+	leading  uint8
+	trailing uint8
+
+	st  int64
+	t   int64
+	val float64
+
+	tDelta uint64
+	stDiff int64 // Accumulated prevT - st.
+	err    error
+
+	baselineV float64 // Last non-stale value for XOR baseline.
+}
+
+func (it *xor2Iterator) Seek(t int64) ValueType {
+	if it.err != nil {
+		return ValNone
+	}
+
+	for t > it.t || it.numRead == 0 {
+		if it.Next() == ValNone {
+			return ValNone
+		}
+	}
+	return ValFloat
+}
+
+func (it *xor2Iterator) At() (int64, float64) {
+	return it.t, it.val
+}
+
+func (*xor2Iterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) {
+	panic("cannot call xor2Iterator.AtHistogram")
+}
+
+func (*xor2Iterator) AtFloatHistogram(*histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
+	panic("cannot call xor2Iterator.AtFloatHistogram")
+}
+
+func (it *xor2Iterator) AtT() int64 {
+	return it.t
+}
+
+func (it *xor2Iterator) AtST() int64 {
+	return it.st
+}
+
+func (it *xor2Iterator) Err() error {
+	return it.err
+}
+
+func (it *xor2Iterator) Reset(b []byte) {
+	it.br = newBReader(b[chunkHeaderSize+chunkSTHeaderSize:])
+	it.numTotal = binary.BigEndian.Uint16(b)
+	it.firstSTKnown, it.firstSTChangeOn = readSTHeader(b[chunkHeaderSize:])
+
+	it.numRead = 0
+	it.st = 0
+	it.t = 0
+	it.val = 0
+	it.leading = 0
+	it.trailing = 0
+	it.tDelta = 0
+	it.stDiff = 0
+	it.baselineV = 0
+	it.err = nil
+}
+
+func (it *xor2Iterator) Next() ValueType {
+	if it.err != nil || it.numRead == it.numTotal {
+		return ValNone
+	}
+
+	if it.numRead == 0 {
+		t, err := it.br.readVarint()
+		if err != nil {
+			it.err = err
+			return ValNone
+		}
+		v, err := it.br.readBits(64)
+		if err != nil {
+			it.err = err
+			return ValNone
+		}
+		it.t = t
+		it.val = math.Float64frombits(v)
+		if !value.IsStaleNaN(it.val) {
+			it.baselineV = it.val
+		}
+
+		// Optional ST for sample 0.
+		if it.firstSTKnown {
+			stDiff, err := it.br.readVarint()
+			if err != nil {
+				it.err = err
+				return ValNone
+			}
+			it.st = t - stDiff
+		}
+
+		it.numRead++
+		return ValFloat
+	}
+
+	if it.numRead == 1 {
+		tDelta, err := it.br.readUvarint()
+		if err != nil {
+			it.err = err
+			return ValNone
+		}
+		prevT := it.t
+		it.tDelta = tDelta
+		it.t += int64(it.tDelta)
+
+		if err := it.decodeValue(); err != nil {
+			it.err = err
+			return ValNone
+		}
+
+		// Optional ST delta for sample 1.
+		if it.firstSTChangeOn == 1 {
+			sdod, err := readVarbitInt(&it.br)
+			if err != nil {
+				it.err = err
+				return ValNone
+			}
+			it.stDiff = sdod
+			it.st = prevT - sdod
+		}
+
+		it.numRead++
+		return ValFloat
+	}
+
+	// Sample N >= 2: read joint XOR2 control, then optional ST data.
+	prevT := it.t
+	savedNumRead := it.numRead
+
+	ctrl, ok := it.br.readXOR2ControlFast()
+	if !ok {
+		var err error
+		ctrl, err = it.br.readXOR2Control()
+		if err != nil {
+			it.err = err
+			return ValNone
+		}
+	}
+
+	switch ctrl {
+	case 0:
+		// dod=0, value unchanged.
+		it.t += int64(it.tDelta)
+		it.val = it.baselineV
+	case 1:
+		// dod=0, value changed.
+		it.t += int64(it.tDelta)
+		if err := it.decodeValueKnownNonZero(); err != nil {
+			it.err = err
+			return ValNone
+		}
+	case 2:
+		// 13-bit dod.
+		if err := it.readDod(13); err != nil {
+			it.err = err
+			return ValNone
+		}
+		if err := it.decodeValue(); err != nil {
+			it.err = err
+			return ValNone
+		}
+	case 3:
+		// 20-bit dod.
+		if err := it.readDod(20); err != nil {
+			it.err = err
+			return ValNone
+		}
+		if err := it.decodeValue(); err != nil {
+			it.err = err
+			return ValNone
+		}
+	case 4:
+		// 64-bit escape.
+		if err := it.readDod(64); err != nil {
+			it.err = err
+			return ValNone
+		}
+		if err := it.decodeValue(); err != nil {
+			it.err = err
+			return ValNone
+		}
+	default:
+		// dod=0, stale NaN.
+		it.t += int64(it.tDelta)
+		it.val = math.Float64frombits(value.StaleNaN)
+	}
+
+	// Optional ST data, appended after the joint timestamp+value encoding.
+	// The ST delta was encoded as (prevT - st), using the PREVIOUS sample's t.
+	if it.firstSTChangeOn > 0 && savedNumRead >= uint16(it.firstSTChangeOn) {
+		sdod, err := readVarbitInt(&it.br)
+		if err != nil {
+			it.err = err
+			return ValNone
+		}
+		if savedNumRead == uint16(it.firstSTChangeOn) {
+			it.stDiff = sdod
+		} else {
+			it.stDiff += sdod
+		}
+		it.st = prevT - it.stDiff
+	}
+
+	it.numRead++
+	return ValFloat
+}
+
+// readDod reads a signed dod of width w bits and updates it.tDelta and it.t.
+func (it *xor2Iterator) readDod(w uint8) error {
+	var b uint64
+	if it.br.valid >= w {
+		it.br.valid -= w
+		b = (it.br.buffer >> it.br.valid) & ((uint64(1) << w) - 1)
+	} else {
+		var err error
+		b, err = it.br.readBits(w)
+		if err != nil {
+			return err
+		}
+	}
+
+	if w < 64 && b >= (1<<(w-1)) {
+		b -= 1 << w
+	}
+
+	it.tDelta = uint64(int64(it.tDelta) + int64(b))
+	it.t += int64(it.tDelta)
+	return nil
+}
+
+// decodeValue reads the XOR2 value encoding for the dod≠0 case:
+//
+//	`0`   → value unchanged
+//	`10`  → reuse previous leading/trailing window
+//	`110` → new leading/trailing window
+//	`111` → stale NaN
+func (it *xor2Iterator) decodeValue() error {
+	// Fast path: 3 bits available — read the full control prefix in one shot.
+	// Encoding: `0`=unchanged, `10`=reuse window, `110`=new window, `111`=stale NaN.
+	if it.br.valid >= 3 {
+		ctrl := (it.br.buffer >> (it.br.valid - 3)) & 0x7
+		if ctrl&0x4 == 0 {
+			// `0xx`: value unchanged, consume 1 bit.
+			it.br.valid--
+			it.val = it.baselineV
+			return nil
+		}
+		if ctrl&0x6 == 0x4 {
+			// `10x`: reuse previous leading/trailing window, consume 2 bits.
+			it.br.valid -= 2
+			sz := uint8(64 - int(it.leading) - int(it.trailing))
+			var valueBits uint64
+			if it.br.valid >= sz {
+				it.br.valid -= sz
+				valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
+			} else {
+				var err error
+				valueBits, err = it.br.readBits(sz)
+				if err != nil {
+					return err
+				}
+			}
+			vbits := math.Float64bits(it.baselineV)
+			vbits ^= valueBits << it.trailing
+			it.val = math.Float64frombits(vbits)
+			it.baselineV = it.val
+			return nil
+		}
+		// `11x`: consume 3 bits.
+		it.br.valid -= 3
+		if ctrl == 0x6 {
+			// `110`: new leading/trailing window.
+			return it.decodeNewLeadingTrailing()
+		}
+		// `111`: stale NaN.
+		it.val = math.Float64frombits(value.StaleNaN)
+		return nil
+	}
+
+	// Slow path: fewer than 3 bits buffered (rare, only near buffer refills).
+	var bit bit
+	if it.br.valid > 0 {
+		it.br.valid--
+		bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
+	} else {
+		var err error
+		bit, err = it.br.readBit()
+		if err != nil {
+			return err
+		}
+	}
+
+	if bit == zero {
+		// `0` → value unchanged.
+		it.val = it.baselineV
+		return nil
+	}
+
+	if it.br.valid > 0 {
+		it.br.valid--
+		bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
+	} else {
+		var err error
+		bit, err = it.br.readBit()
+		if err != nil {
+			return err
+		}
+	}
+
+	if bit == zero {
+		// `10` → reuse previous leading/trailing window.
+		sz := uint8(64 - int(it.leading) - int(it.trailing))
+		var valueBits uint64
+		if it.br.valid >= sz {
+			it.br.valid -= sz
+			valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
+		} else {
+			var err error
+			valueBits, err = it.br.readBits(sz)
+			if err != nil {
+				return err
+			}
+		}
+		vbits := math.Float64bits(it.baselineV)
+		vbits ^= valueBits << it.trailing
+		it.val = math.Float64frombits(vbits)
+		it.baselineV = it.val
+		return nil
+	}
+
+	if it.br.valid > 0 {
+		it.br.valid--
+		bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
+	} else {
+		var err error
+		bit, err = it.br.readBit()
+		if err != nil {
+			return err
+		}
+	}
+
+	if bit == zero {
+		// `110` → new leading/trailing window.
+		return it.decodeNewLeadingTrailing()
+	}
+
+	// `111` → stale NaN.
+	it.val = math.Float64frombits(value.StaleNaN)
+	return nil
+}
+
+// decodeValueKnownNonZero reads the XOR2 value encoding for the dod=0,
+// value-changed case:
+//
+//	`0` → reuse previous leading/trailing window
+//	`1` → new leading/trailing window
+func (it *xor2Iterator) decodeValueKnownNonZero() error {
+	sz := uint8(64 - int(it.leading) - int(it.trailing))
+	// Fast path: combine the 1-bit reuse/new-window control read with the
+	// sz-bit value read into a single buffer operation.
+	if it.br.valid >= 1+sz {
+		ctrlBit := (it.br.buffer >> (it.br.valid - 1)) & 1
+		if ctrlBit == 0 { // `0`: reuse previous leading/trailing window.
+			it.br.valid -= 1 + sz
+			valueBits := (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
+			vbits := math.Float64bits(it.baselineV)
+			vbits ^= valueBits << it.trailing
+			it.val = math.Float64frombits(vbits)
+			it.baselineV = it.val
+			return nil
+		}
+		// `1`: new leading/trailing window.
+		it.br.valid--
+		return it.decodeNewLeadingTrailing()
+	}
+
+	// Slow path: read control bit then value bits separately.
+	var bit bit
+	if it.br.valid > 0 {
+		it.br.valid--
+		bit = (it.br.buffer & (uint64(1) << it.br.valid)) != 0
+	} else {
+		var err error
+		bit, err = it.br.readBit()
+		if err != nil {
+			return err
+		}
+	}
+
+	if bit == zero {
+		// `0` → reuse previous leading/trailing window.
+		var valueBits uint64
+		if it.br.valid >= sz {
+			it.br.valid -= sz
+			valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << sz) - 1)
+		} else {
+			var err error
+			valueBits, err = it.br.readBits(sz)
+			if err != nil {
+				return err
+			}
+		}
+		vbits := math.Float64bits(it.baselineV)
+		vbits ^= valueBits << it.trailing
+		it.val = math.Float64frombits(vbits)
+		it.baselineV = it.val
+		return nil
+	}
+
+	// `1` → new leading/trailing window.
+	return it.decodeNewLeadingTrailing()
+}
+
+// decodeNewLeadingTrailing reads a new leading/sigbits/value triple and
+// updates it.leading, it.trailing, it.val, and it.baselineV.
+func (it *xor2Iterator) decodeNewLeadingTrailing() error {
+	var newLeading, sigbits uint64
+	// Fast path: read leading (5 bits) and sigbits (6 bits) together as 11 bits.
+	if it.br.valid >= 11 {
+		val := (it.br.buffer >> (it.br.valid - 11)) & 0x7ff
+		it.br.valid -= 11
+		newLeading = val >> 6
+		sigbits = val & 0x3f
+	} else {
+		var err error
+		newLeading, err = it.br.readBits(5)
+		if err != nil {
+			return err
+		}
+		sigbits, err = it.br.readBits(6)
+		if err != nil {
+			return err
+		}
+	}
+
+	it.leading = uint8(newLeading)
+	if sigbits == 0 {
+		sigbits = 64
+	}
+	it.trailing = 64 - it.leading - uint8(sigbits)
+
+	n := uint8(sigbits)
+	var valueBits uint64
+	if it.br.valid >= n {
+		it.br.valid -= n
+		valueBits = (it.br.buffer >> it.br.valid) & ((uint64(1) << n) - 1)
+	} else {
+		var err error
+		valueBits, err = it.br.readBits(n)
+		if err != nil {
+			return err
+		}
+	}
+
+	vbits := math.Float64bits(it.baselineV)
+	vbits ^= valueBits << it.trailing
+	it.val = math.Float64frombits(vbits)
+	it.baselineV = it.val
+	return nil
+}
--- a/tsdb/chunkenc/xor2_test.go
+++ b/tsdb/chunkenc/xor2_test.go
@ -0,0 +1,527 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunkenc
+
+import (
+	"fmt"
+	"math"
+	"math/bits"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/model/value"
+)
+
+func newXOR2IteratorForPayload(t *testing.T, padding int, payload func(*bstream), setup func(*xor2Iterator)) *xor2Iterator {
+	t.Helper()
+
+	var bs bstream
+	if padding > 0 {
+		bs.writeBitsFast(0, padding)
+	}
+	payload(&bs)
+	// Add tail bytes so the reader initially fills a full 64-bit buffer.
+	bs.writeBitsFast(0, 64)
+
+	it := &xor2Iterator{}
+	if setup != nil {
+		setup(it)
+	}
+	it.br = newBReader(bs.bytes())
+
+	if padding > 0 {
+		_, err := it.br.readBits(uint8(padding))
+		require.NoError(t, err)
+	}
+
+	return it
+}
+
+func writeXOR2NewWindowPayload(bs *bstream, delta uint64) (leading, trailing uint8) {
+	leading, trailing, sigbits := xor2DeltaWindow(delta)
+	encodedSigbits := sigbits
+	if sigbits == 64 {
+		encodedSigbits = 0
+	}
+
+	bs.writeBitsFast(uint64(leading), 5)
+	bs.writeBitsFast(uint64(encodedSigbits), 6)
+	bs.writeBitsFast(delta>>trailing, int(sigbits))
+
+	return leading, trailing
+}
+
+func xor2DeltaWindow(delta uint64) (leading, trailing, sigbits uint8) {
+	leading = uint8(bits.LeadingZeros64(delta))
+	trailing = uint8(bits.TrailingZeros64(delta))
+	if leading >= 32 {
+		leading = 31
+	}
+
+	return leading, trailing, 64 - leading - trailing
+}
+
+func BenchmarkXor2Write(b *testing.B) {
+	samples := make([]struct {
+		t int64
+		v float64
+	}, 120)
+	for i := range samples {
+		samples[i].t = int64(i) * 1000
+		samples[i].v = float64(i) + float64(i)/10 + float64(i)/100 + float64(i)/1000
+	}
+
+	b.ReportAllocs()
+
+	for b.Loop() {
+		c := NewXOR2Chunk()
+		app, _ := c.Appender()
+		for _, s := range samples {
+			app.Append(0, s.t, s.v)
+		}
+	}
+}
+
+func BenchmarkXor2Read(b *testing.B) {
+	c := NewXOR2Chunk()
+	app, err := c.Appender()
+	require.NoError(b, err)
+	for i := int64(0); i < 120*1000; i += 1000 {
+		app.Append(0, i, float64(i)+float64(i)/10+float64(i)/100+float64(i)/1000)
+	}
+
+	b.ReportAllocs()
+
+	var it Iterator
+	for b.Loop() {
+		var ts int64
+		var v float64
+		it = c.Iterator(it)
+		for it.Next() != ValNone {
+			ts, v = it.At()
+		}
+		_, _ = ts, v
+	}
+}
+
+func TestXOR2Basic(t *testing.T) {
+	c := NewXOR2Chunk()
+	app, err := c.Appender()
+	require.NoError(t, err)
+
+	samples := []struct {
+		t int64
+		v float64
+	}{
+		{1000, 1.0},
+		{2000, 2.0},
+		{3000, 3.0},
+		{4000, 4.0},
+		{5000, 5.0},
+	}
+
+	for _, s := range samples {
+		app.Append(0, s.t, s.v)
+	}
+
+	it := c.Iterator(nil)
+	for _, expected := range samples {
+		require.Equal(t, ValFloat, it.Next())
+		ts, v := it.At()
+		require.Equal(t, expected.t, ts)
+		require.Equal(t, expected.v, v)
+	}
+	require.Equal(t, ValNone, it.Next())
+}
+
+func TestXOR2WithStaleness(t *testing.T) {
+	c := NewXOR2Chunk()
+	app, err := c.Appender()
+	require.NoError(t, err)
+
+	samples := []struct {
+		t     int64
+		v     float64
+		stale bool
+	}{
+		{1000, 1.0, false},
+		{2000, 2.0, false},
+		{3000, math.Float64frombits(value.StaleNaN), true},
+		{4000, 4.0, false},
+		{5000, math.Float64frombits(value.StaleNaN), true},
+		{6000, 6.0, false},
+	}
+
+	for _, s := range samples {
+		app.Append(0, s.t, s.v)
+	}
+
+	it := c.Iterator(nil)
+	for _, expected := range samples {
+		require.Equal(t, ValFloat, it.Next())
+		ts, v := it.At()
+		require.Equal(t, expected.t, ts)
+		if expected.stale {
+			require.True(t, value.IsStaleNaN(v), "Expected stale NaN at ts=%d", ts)
+		} else {
+			require.Equal(t, expected.v, v)
+		}
+	}
+	require.Equal(t, ValNone, it.Next())
+}
+
+func TestXOR2StaleWithDodNonZero(t *testing.T) {
+	c := NewXOR2Chunk()
+	app, err := c.Appender()
+	require.NoError(t, err)
+
+	// Stale NaN samples where the timestamp dod is non-zero, exercising the
+	// `111` value encoding path inside writeVDelta.
+	samples := []struct {
+		t     int64
+		v     float64
+		stale bool
+	}{
+		{1000, 1.0, false},
+		{2000, 2.0, false},
+		// dod = (1050 - 1000) - (2000 - 1000) = 50 - 1000 = -950: stale with dod≠0.
+		{3050, math.Float64frombits(value.StaleNaN), true},
+		{4050, 4.0, false},
+		{5050, 5.0, false},
+	}
+
+	for _, s := range samples {
+		app.Append(0, s.t, s.v)
+	}
+
+	it := c.Iterator(nil)
+	for _, expected := range samples {
+		require.Equal(t, ValFloat, it.Next())
+		ts, v := it.At()
+		require.Equal(t, expected.t, ts)
+		if expected.stale {
+			require.True(t, value.IsStaleNaN(v), "Expected stale NaN at ts=%d", ts)
+		} else {
+			require.Equal(t, expected.v, v)
+		}
+	}
+	require.Equal(t, ValNone, it.Next())
+}
+
+func TestXOR2IrregularTimestamps(t *testing.T) {
+	c := NewXOR2Chunk()
+	app, err := c.Appender()
+	require.NoError(t, err)
+
+	// Timestamps with dod values spanning multiple encoding ranges.
+	timestamps := []int64{
+		1000, 2000, 3000,
+		// dod in 13-bit range.
+		3050, 4050, 5050,
+		// dod in 20-bit range (large jitter).
+		5050 + 100000, 5050 + 200000, 5050 + 300000,
+		// Back to regular.
+		5050 + 301000,
+	}
+	for _, ts := range timestamps {
+		app.Append(0, ts, 1.0)
+	}
+
+	it := c.Iterator(nil)
+	for _, expected := range timestamps {
+		require.Equal(t, ValFloat, it.Next())
+		ts, _ := it.At()
+		require.Equal(t, expected, ts)
+	}
+	require.Equal(t, ValNone, it.Next())
+}
+
+func TestXOR2LargeDod(t *testing.T) {
+	c := NewXOR2Chunk()
+	app, err := c.Appender()
+	require.NoError(t, err)
+
+	// Force the 64-bit escape path with a very large dod.
+	timestamps := []int64{0, 1000, 2000, 2000 + (1 << 20)}
+	for _, ts := range timestamps {
+		app.Append(0, ts, 1.0)
+	}
+
+	it := c.Iterator(nil)
+	for _, expected := range timestamps {
+		require.Equal(t, ValFloat, it.Next())
+		ts, _ := it.At()
+		require.Equal(t, expected, ts)
+	}
+	require.Equal(t, ValNone, it.Next())
+}
+
+func TestXOR2ChunkST(t *testing.T) {
+	testChunkSTHandling(t, ValFloat, func() Chunk {
+		return NewXOR2Chunk()
+	})
+}
+
+func TestXOR2Chunk_MoreThan127Samples(t *testing.T) {
+	const afterMax = maxFirstSTChangeOn + 3
+	t.Run("zero ST", func(t *testing.T) {
+		chunk := NewXOR2Chunk()
+		app, err := chunk.Appender()
+		require.NoError(t, err)
+		for i := range afterMax {
+			app.Append(0, int64(i*10+1), float64(i)*1.5)
+		}
+
+		it := chunk.Iterator(nil)
+		for i := range afterMax {
+			require.Equal(t, ValFloat, it.Next())
+			st := it.AtST()
+			ts, v := it.At()
+			require.Equal(t, int64(0), st)
+			require.Equal(t, int64(i*10+1), ts)
+			require.Equal(t, float64(i)*1.5, v)
+		}
+
+		require.Equal(t, ValNone, it.Next())
+		require.NoError(t, it.Err())
+	})
+
+	t.Run("non-zero ST after 127", func(t *testing.T) {
+		chunk := NewXOR2Chunk()
+		app, err := chunk.Appender()
+		require.NoError(t, err)
+		for i := range afterMax {
+			st := int64(0)
+			if i == afterMax-1 {
+				st = int64((afterMax - 1) * 10)
+			}
+			app.Append(st, int64(i*10+1), float64(i)*1.5)
+		}
+
+		it := chunk.Iterator(nil)
+		for i := range afterMax {
+			require.Equal(t, ValFloat, it.Next())
+			st := it.AtST()
+			ts, v := it.At()
+			if i == afterMax-1 {
+				require.Equal(t, int64((afterMax-1)*10), st)
+			} else {
+				require.Equal(t, int64(0), st)
+			}
+			require.Equal(t, int64(i*10+1), ts)
+			require.Equal(t, float64(i)*1.5, v)
+		}
+
+		require.Equal(t, ValNone, it.Next())
+		require.NoError(t, it.Err())
+	})
+}
+
+// TestXOR2DecodeFunctionsAcrossPadding exercises decodeValue,
+// decodeValueKnownNonZero, and decodeNewLeadingTrailing across all logical
+// cases × all 64 bit-buffer alignments (padding 0..63). Padding controls the
+// number of bits that precede the payload in the stream, which determines
+// how many bits remain in the 64-bit read buffer when the decode function is
+// called. This Cartesian product ensures both the fast path (enough bits
+// buffered for a single-shot read) and the slow path (bits span a buffer
+// refill) are exercised for every case.
+func TestXOR2DecodeFunctionsAcrossPadding(t *testing.T) {
+	const baseline = 1234.5
+
+	type testCase struct {
+		name    string
+		payload func(*bstream)
+		setup   func(*xor2Iterator)
+		assert  func(*testing.T, *xor2Iterator)
+	}
+
+	runCases := func(t *testing.T, cases []testCase, fn func(*xor2Iterator) error) {
+		t.Helper()
+		for _, tc := range cases {
+			t.Run(tc.name, func(t *testing.T) {
+				for padding := range 64 {
+					t.Run(fmt.Sprintf("padding=%d", padding), func(t *testing.T) {
+						it := newXOR2IteratorForPayload(t, padding, tc.payload, tc.setup)
+						require.NoError(t, fn(it))
+						tc.assert(t, it)
+					})
+				}
+			})
+		}
+	}
+
+	// decodeValue: `0`=unchanged, `10`=reuse window, `110`=new window, `111`=stale NaN.
+	t.Run("decodeValue", func(t *testing.T) {
+		reuseD := uint64(0x000ABCDE000000)
+		rL, rT, rS := xor2DeltaWindow(reuseD)
+
+		// Two new-window variants: full-width sigbits (encoded as 0) and small
+		// sigbits, to cover both value-bits read paths inside decodeNewLeadingTrailing.
+		newDFull := uint64(0xFEDCBA9876543211)
+		nLFull, nTFull, _ := xor2DeltaWindow(newDFull)
+		newDSmall := uint64(0x000ABCDE000000)
+		nLSmall, nTSmall, _ := xor2DeltaWindow(newDSmall)
+
+		runCases(t, []testCase{
+			{
+				name:    "unchanged",
+				payload: func(bs *bstream) { bs.writeBit(zero) },
+				setup:   func(it *xor2Iterator) { it.baselineV = baseline },
+				assert: func(t *testing.T, it *xor2Iterator) {
+					require.Equal(t, baseline, it.val)
+					require.Equal(t, baseline, it.baselineV)
+				},
+			},
+			{
+				name: "reuse_window",
+				payload: func(bs *bstream) {
+					bs.writeBitsFast(0b10, 2)
+					bs.writeBitsFast(reuseD>>rT, int(rS))
+				},
+				setup: func(it *xor2Iterator) {
+					it.baselineV = baseline
+					it.leading, it.trailing = rL, rT
+				},
+				assert: func(t *testing.T, it *xor2Iterator) {
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ reuseD)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+					require.Equal(t, rL, it.leading)
+					require.Equal(t, rT, it.trailing)
+				},
+			},
+			{
+				name: "new_window_full_sigbits",
+				payload: func(bs *bstream) {
+					bs.writeBitsFast(0b110, 3)
+					writeXOR2NewWindowPayload(bs, newDFull)
+				},
+				setup: func(it *xor2Iterator) { it.baselineV = baseline },
+				assert: func(t *testing.T, it *xor2Iterator) {
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ newDFull)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+					require.Equal(t, nLFull, it.leading)
+					require.Equal(t, nTFull, it.trailing)
+				},
+			},
+			{
+				name: "new_window_small_sigbits",
+				payload: func(bs *bstream) {
+					bs.writeBitsFast(0b110, 3)
+					writeXOR2NewWindowPayload(bs, newDSmall)
+				},
+				setup: func(it *xor2Iterator) { it.baselineV = baseline },
+				assert: func(t *testing.T, it *xor2Iterator) {
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ newDSmall)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+					require.Equal(t, nLSmall, it.leading)
+					require.Equal(t, nTSmall, it.trailing)
+				},
+			},
+			{
+				name:    "stale_nan",
+				payload: func(bs *bstream) { bs.writeBitsFast(0b111, 3) },
+				setup:   func(it *xor2Iterator) { it.baselineV = baseline },
+				assert: func(t *testing.T, it *xor2Iterator) {
+					require.True(t, value.IsStaleNaN(it.val))
+					require.Equal(t, baseline, it.baselineV)
+				},
+			},
+		}, (*xor2Iterator).decodeValue)
+	})
+
+	// decodeValueKnownNonZero: `0`=reuse window, `1`=new window.
+	// The new_window case uses real leading/trailing (not 0xff) so that sz is
+	// small enough for the fast path (valid >= 1+sz) to be reached with ctrlBit=1.
+	t.Run("decodeValueKnownNonZero", func(t *testing.T) {
+		delta := uint64(0x000ABCDE000000)
+		dL, dT, dS := xor2DeltaWindow(delta)
+
+		runCases(t, []testCase{
+			{
+				name: "reuse_window",
+				payload: func(bs *bstream) {
+					bs.writeBit(zero)
+					bs.writeBitsFast(delta>>dT, int(dS))
+				},
+				setup: func(it *xor2Iterator) {
+					it.baselineV = baseline
+					it.leading, it.trailing = dL, dT
+				},
+				assert: func(t *testing.T, it *xor2Iterator) {
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ delta)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+				},
+			},
+			{
+				name: "new_window",
+				payload: func(bs *bstream) {
+					bs.writeBit(one)
+					writeXOR2NewWindowPayload(bs, delta)
+				},
+				setup: func(it *xor2Iterator) {
+					it.baselineV = baseline
+					it.leading, it.trailing = dL, dT
+				},
+				assert: func(t *testing.T, it *xor2Iterator) {
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ delta)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+					require.Equal(t, dL, it.leading)
+					require.Equal(t, dT, it.trailing)
+				},
+			},
+		}, (*xor2Iterator).decodeValueKnownNonZero)
+	})
+
+	// decodeNewLeadingTrailing: exercises the 11-bit header fast path, the
+	// value-bits fast path (small sigbits), and full-width sigbits (encoded as 0).
+	t.Run("decodeNewLeadingTrailing", func(t *testing.T) {
+		smallD := uint64(0x000ABCDE000000)
+		sL, sT, _ := xor2DeltaWindow(smallD)
+		fullD := uint64(0xFEDCBA9876543211)
+		fL, fT, _ := xor2DeltaWindow(fullD)
+
+		runCases(t, []testCase{
+			{
+				name:    "small_sigbits",
+				payload: func(bs *bstream) { writeXOR2NewWindowPayload(bs, smallD) },
+				setup:   func(it *xor2Iterator) { it.baselineV = baseline },
+				assert: func(t *testing.T, it *xor2Iterator) {
+					require.Equal(t, sL, it.leading)
+					require.Equal(t, sT, it.trailing)
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ smallD)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+				},
+			},
+			{
+				name:    "full_width_sigbits",
+				payload: func(bs *bstream) { writeXOR2NewWindowPayload(bs, fullD) },
+				setup:   func(it *xor2Iterator) { it.baselineV = baseline },
+				assert: func(t *testing.T, it *xor2Iterator) {
+					require.Equal(t, fL, it.leading)
+					require.Equal(t, fT, it.trailing)
+					expected := math.Float64frombits(math.Float64bits(baseline) ^ fullD)
+					require.Equal(t, expected, it.val)
+					require.Equal(t, expected, it.baselineV)
+				},
+			},
+		}, (*xor2Iterator).decodeNewLeadingTrailing)
+	})
+}
--- a/tsdb/chunks/chunks.go
+++ b/tsdb/chunks/chunks.go
@ -135,7 +135,9 @@ type Meta struct {
 }

 // ChunkFromSamples requires all samples to have the same type.
-// TODO(krajorama): test with ST when chunk formats support it.
+// It is not efficient and meant for testing purposes only.
+// It scans the samples to determine whether any sample has ST set and
+// creates a chunk accordingly.
 func ChunkFromSamples(s []Sample) (Meta, error) {
 	return ChunkFromSamplesGeneric(SampleSlice(s))
 }
@ -154,7 +156,17 @@ func ChunkFromSamplesGeneric(s Samples) (Meta, error) {
 	}

 	sampleType := s.Get(0).Type()
-	c, err := chunkenc.NewEmptyChunk(sampleType.ChunkEncoding())
+
+	hasST := false
+	for i := range s.Len() {
+		if s.Get(i).ST() != 0 {
+			hasST = true
+			break
+		}
+	}
+
+	// Request storing ST in the chunk if available.
+	c, err := sampleType.NewChunk(hasST)
 	if err != nil {
 		return Meta{}, err
 	}
--- a/tsdb/chunks/chunks_test.go
+++ b/tsdb/chunks/chunks_test.go
@ -19,6 +19,7 @@ import (

 	"github.com/stretchr/testify/require"

+	"github.com/prometheus/prometheus/tsdb/chunkenc"
 	"github.com/prometheus/prometheus/tsdb/tsdbutil"
 )

@ -60,3 +61,35 @@ func TestWriterWithDefaultSegmentSize(t *testing.T) {
 	require.NoError(t, err)
 	require.Len(t, d, 1, "expected only one segment to be created to hold both chunks")
 }
+
+func TestChunkFromSamplesWithST(t *testing.T) {
+	// Create samples with explicit ST (source timestamp) values.
+	samples := []Sample{
+		sample{t: 10, f: 11, st: 5},
+		sample{t: 20, f: 12, st: 15},
+		sample{t: 30, f: 13, st: 25},
+	}
+
+	chk, err := ChunkFromSamples(samples)
+	require.NoError(t, err)
+	require.NotNil(t, chk.Chunk)
+
+	// Verify MinTime and MaxTime.
+	require.Equal(t, int64(10), chk.MinTime)
+	require.Equal(t, int64(30), chk.MaxTime)
+
+	// Iterate over the chunk and verify ST values are preserved.
+	it := chk.Chunk.Iterator(nil)
+	idx := 0
+	for vt := it.Next(); vt != chunkenc.ValNone; vt = it.Next() {
+		require.Equal(t, chunkenc.ValFloat, vt)
+		ts, v := it.At()
+		st := it.AtST()
+		require.Equal(t, samples[idx].ST(), st, "ST mismatch at index %d", idx)
+		require.Equal(t, samples[idx].T(), ts, "T mismatch at index %d", idx)
+		require.Equal(t, samples[idx].F(), v, "F mismatch at index %d", idx)
+		idx++
+	}
+	require.NoError(t, it.Err())
+	require.Equal(t, len(samples), idx, "expected all samples to be iterated")
+}
--- a/tsdb/compression/compression.go
+++ b/tsdb/compression/compression.go
@ -0,0 +1,130 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compression
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/golang/snappy"
+	"github.com/klauspost/compress/zstd"
+)
+
+// Type represents the compression type used for encoding and decoding data.
+type Type string
+
+const (
+	// None represents no compression case.
+	// None it's a default when Type is empty.
+	None Type = "none"
+	// Snappy represents snappy block format.
+	Snappy Type = "snappy"
+	// Zstd represents zstd compression.
+	Zstd Type = "zstd"
+)
+
+// Encoder provides compression encoding functionality for supported compression
+// types. It is agnostic to the content being compressed, operating on byte
+// slices of serialized data streams. The encoder maintains internal state for
+// Zstd compression and can handle multiple compression types including None,
+// Snappy, and Zstd.
+type Encoder struct {
+	w *zstd.Encoder
+}
+
+// NewEncoder creates a new Encoder. Returns an error if the zstd encoder cannot
+// be initialized.
+func NewEncoder() (*Encoder, error) {
+	e := &Encoder{}
+	w, err := zstd.NewWriter(nil)
+	if err != nil {
+		return nil, err
+	}
+	e.w = w
+	return e, nil
+}
+
+// Encode returns the encoded form of src for the given compression type. It also
+// returns the indicator if the compression was performed. Encode may skip
+// compressing for None type, but also when src is too large e.g. for Snappy block format.
+//
+// The buf is used as a buffer for returned encoding, and it must not overlap with
+// src. It is valid to pass a nil buf.
+func (e *Encoder) Encode(t Type, src, buf []byte) (_ []byte, compressed bool, err error) {
+	switch {
+	case len(src) == 0, t == "", t == None:
+		return src, false, nil
+	case t == Snappy:
+		// If MaxEncodedLen is less than 0 the record is too large to be compressed.
+		if snappy.MaxEncodedLen(len(src)) < 0 {
+			return src, false, nil
+		}
+
+		// The snappy library uses `len` to calculate if we need a new buffer.
+		// In order to allocate as few buffers as possible make the length
+		// equal to the capacity.
+		buf = buf[:cap(buf)]
+		return snappy.Encode(buf, src), true, nil
+	case t == Zstd:
+		if e == nil {
+			return nil, false, errors.New("zstd requested but encoder was not initialized with NewEncoder()")
+		}
+		return e.w.EncodeAll(src, buf[:0]), true, nil
+	default:
+		return nil, false, fmt.Errorf("unsupported compression type: %s", t)
+	}
+}
+
+// Decoder provides decompression functionality for supported compression types.
+// It is agnostic to the content being decompressed, operating on byte slices of
+// serialized data streams. The decoder maintains internal state for Zstd
+// decompression and can handle multiple compression types including None,
+// Snappy, and Zstd.
+type Decoder struct {
+	r *zstd.Decoder
+}
+
+// NewDecoder creates a new Decoder.
+func NewDecoder() *Decoder {
+	d := &Decoder{}
+
+	// Calling zstd.NewReader with a nil io.Reader and no options cannot return an error.
+	r, _ := zstd.NewReader(nil)
+	d.r = r
+	return d
+}
+
+// Decode returns the decoded form of src or error, given expected compression type.
+//
+// The buf is used as a buffer for the returned decoded entry, and it must not
+// overlap with src. It is valid to pass a nil buf.
+func (d *Decoder) Decode(t Type, src, buf []byte) (_ []byte, err error) {
+	switch {
+	case len(src) == 0, t == "", t == None:
+		return src, nil
+	case t == Snappy:
+		// The snappy library uses `len` to calculate if we need a new buffer.
+		// In order to allocate as few buffers as possible make the length
+		// equal to the capacity.
+		buf = buf[:cap(buf)]
+		return snappy.Decode(buf, src)
+	case t == Zstd:
+		if d == nil {
+			return nil, errors.New("zstd requested but Decoder was not initialized with NewDecoder()")
+		}
+		return d.r.DecodeAll(src, buf[:0])
+	default:
+		return nil, fmt.Errorf("unsupported compression type: %s", t)
+	}
+}
--- a/tsdb/db.go
+++ b/tsdb/db.go
@ -240,6 +240,11 @@ type Options struct {
 	// is implemented.
 	EnableSTAsZeroSample bool

+	// EnableXOR2Encoding enables the XOR2 chunk encoding for float samples.
+	// XOR2 provides better compression than XOR, especially for stale markers.
+	// Automatically set to true when EnableSTStorage is true.
+	EnableXOR2Encoding bool
+
 	// EnableSTStorage determines whether TSDB should write a Start Timestamp (ST)
 	// per sample to WAL.
 	// TODO(bwplotka): Implement this option as per PROM-60, currently it's noop.
@ -868,6 +873,8 @@ func Open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, st
 		opts.FeatureRegistry.Set(features.TSDB, "isolation", !opts.IsolationDisabled)
 		opts.FeatureRegistry.Set(features.TSDB, "use_uncached_io", opts.UseUncachedIO)
 		opts.FeatureRegistry.Enable(features.TSDB, "native_histograms")
+		opts.FeatureRegistry.Set(features.TSDB, "st_storage", opts.EnableSTStorage)
+		opts.FeatureRegistry.Set(features.TSDB, "xor2_encoding", opts.EnableXOR2Encoding)
 	}

 	return open(dir, l, r, opts, rngs, stats)
@ -1074,6 +1081,8 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn
 	headOpts.OutOfOrderCapMax.Store(opts.OutOfOrderCapMax)
 	headOpts.EnableSharding = opts.EnableSharding
 	headOpts.EnableSTAsZeroSample = opts.EnableSTAsZeroSample
+	headOpts.EnableSTStorage.Store(opts.EnableSTStorage)
+	headOpts.EnableXOR2Encoding.Store(opts.EnableXOR2Encoding)
 	headOpts.EnableMetadataWALRecords = opts.EnableMetadataWALRecords
 	if opts.WALReplayConcurrency > 0 {
 		headOpts.WALReplayConcurrency = opts.WALReplayConcurrency
@ -1277,18 +1286,12 @@ func (db *DB) ApplyConfig(conf *config.Config) error {
 		// Update retention configuration if provided.
 		if conf.StorageConfig.TSDBConfig.Retention != nil {
 			db.retentionMtx.Lock()
-			if conf.StorageConfig.TSDBConfig.Retention.Time > 0 {
-				db.opts.RetentionDuration = int64(conf.StorageConfig.TSDBConfig.Retention.Time)
-				db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
-			}
-			if conf.StorageConfig.TSDBConfig.Retention.Size > 0 {
-				db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
-				db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
-			}
-			if conf.StorageConfig.TSDBConfig.Retention.Percentage > 0 {
-				db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
-				db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage))
-			}
+			db.opts.RetentionDuration = int64(time.Duration(conf.StorageConfig.TSDBConfig.Retention.Time) / time.Millisecond)
+			db.metrics.retentionDuration.Set((time.Duration(db.opts.RetentionDuration) * time.Millisecond).Seconds())
+			db.opts.MaxBytes = int64(conf.StorageConfig.TSDBConfig.Retention.Size)
+			db.metrics.maxBytes.Set(float64(db.opts.MaxBytes))
+			db.opts.MaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
+			db.metrics.maxPercentage.Set(float64(db.opts.MaxPercentage))
 			db.retentionMtx.Unlock()
 		}
 	} else {
--- a/tsdb/db_append_v2_test.go
+++ b/tsdb/db_append_v2_test.go
@ -193,7 +193,7 @@ func TestDataNotAvailableAfterRollback_AppendV2(t *testing.T) {
 			require.NoError(t, err)
 			walSeriesCount += len(series)

-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			var samples []record.RefSample
 			samples, err = dec.Samples(rec, samples)
 			require.NoError(t, err)
@ -968,16 +968,18 @@ func TestWALReplayRaceOnSamplesLoggedBeforeSeries_AppendV2(t *testing.T) {

 	// We test both with few and many samples appended after series creation. If samples are < 120 then there's no
 	// mmap-ed chunk, otherwise there's at least 1 mmap-ed chunk when replaying the WAL.
-	for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
-		for run := 1; run <= numRuns; run++ {
-			t.Run(fmt.Sprintf("samples after series creation = %d, run = %d", numSamplesAfterSeriesCreation, run), func(t *testing.T) {
-				testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation)
-			})
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
+			for run := 1; run <= numRuns; run++ {
+				t.Run(fmt.Sprintf("samples after series creation = %d, run = %d, stStorage = %v", numSamplesAfterSeriesCreation, run, enableSTStorage), func(t *testing.T) {
+					testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation, enableSTStorage)
+				})
+			}
 		}
 	}
 }

-func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int) {
+func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int, enableSTStorage bool) {
 	const numSeries = 1000

 	db := newTestDB(t)
@ -985,7 +987,7 @@ func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSampl

 	for seriesRef := 1; seriesRef <= numSeries; seriesRef++ {
 		// Log samples before the series is logged to the WAL.
-		var enc record.Encoder
+		enc := record.Encoder{EnableSTStorage: enableSTStorage}
 		var samples []record.RefSample

 		for ts := range numSamplesBeforeSeriesCreation {
@ -1176,139 +1178,143 @@ func TestTombstoneCleanResultEmptyBlock_AppendV2(t *testing.T) {

 func TestSizeRetention_AppendV2(t *testing.T) {
 	t.Parallel()
-	opts := DefaultOptions()
-	opts.OutOfOrderTimeWindow = 100
-	db := newTestDB(t, withOpts(opts), withRngs(100))
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			opts := DefaultOptions()
+			opts.OutOfOrderTimeWindow = 100
+			db := newTestDB(t, withOpts(opts), withRngs(100))

-	blocks := []*BlockMeta{
-		{MinTime: 100, MaxTime: 200}, // Oldest block
-		{MinTime: 200, MaxTime: 300},
-		{MinTime: 300, MaxTime: 400},
-		{MinTime: 400, MaxTime: 500},
-		{MinTime: 500, MaxTime: 600}, // Newest Block
-	}
+			blocks := []*BlockMeta{
+				{MinTime: 100, MaxTime: 200}, // Oldest block
+				{MinTime: 200, MaxTime: 300},
+				{MinTime: 300, MaxTime: 400},
+				{MinTime: 400, MaxTime: 500},
+				{MinTime: 500, MaxTime: 600}, // Newest Block
+			}

-	for _, m := range blocks {
-		createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
-	}
+			for _, m := range blocks {
+				createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
+			}

-	headBlocks := []*BlockMeta{
-		{MinTime: 700, MaxTime: 800},
-	}
+			headBlocks := []*BlockMeta{
+				{MinTime: 700, MaxTime: 800},
+			}

-	// Add some data to the WAL.
-	headApp := db.Head().AppenderV2(context.Background())
-	var aSeries labels.Labels
-	var it chunkenc.Iterator
-	for _, m := range headBlocks {
-		series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
-		for _, s := range series {
-			aSeries = s.Labels()
-			it = s.Iterator(it)
-			for it.Next() == chunkenc.ValFloat {
-				tim, v := it.At()
-				_, err := headApp.Append(0, s.Labels(), 0, tim, v, nil, nil, storage.AOptions{})
+			// Add some data to the WAL.
+			headApp := db.Head().AppenderV2(context.Background())
+			var aSeries labels.Labels
+			var it chunkenc.Iterator
+			for _, m := range headBlocks {
+				series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
+				for _, s := range series {
+					aSeries = s.Labels()
+					it = s.Iterator(it)
+					for it.Next() == chunkenc.ValFloat {
+						tim, v := it.At()
+						_, err := headApp.Append(0, s.Labels(), 0, tim, v, nil, nil, storage.AOptions{})
+						require.NoError(t, err)
+					}
+					require.NoError(t, it.Err())
+				}
+			}
+			require.NoError(t, headApp.Commit())
+			db.Head().mmapHeadChunks()
+
+			require.Eventually(t, func() bool {
+				return db.Head().chunkDiskMapper.IsQueueEmpty()
+			}, 2*time.Second, 100*time.Millisecond)
+
+			// Test that registered size matches the actual disk size.
+			require.NoError(t, db.reloadBlocks())                               // Reload the db to register the new db size.
+			require.Len(t, db.Blocks(), len(blocks))                            // Ensure all blocks are registered.
+			blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
+			walSize, err := db.Head().wal.Size()
+			require.NoError(t, err)
+			cdmSize, err := db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			// Expected size should take into account block size + WAL size + Head
+			// chunks size
+			expSize := blockSize + walSize + cdmSize
+			actSize, err := fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Create a WAL checkpoint, and compare sizes.
+			first, last, err := wlog.Segments(db.Head().wal.Dir())
+			require.NoError(t, err)
+			_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0, enableSTStorage)
+			require.NoError(t, err)
+			blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
+			walSize, err = db.Head().wal.Size()
+			require.NoError(t, err)
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			expSize = blockSize + walSize + cdmSize
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Truncate Chunk Disk Mapper and compare sizes.
+			require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			expSize = blockSize + walSize + cdmSize
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Add some out of order samples to check the size of WBL.
+			headApp = db.Head().AppenderV2(context.Background())
+			for ts := int64(750); ts < 800; ts++ {
+				_, err := headApp.Append(0, aSeries, 0, ts, float64(ts), nil, nil, storage.AOptions{})
 				require.NoError(t, err)
 			}
-			require.NoError(t, it.Err())
-		}
+			require.NoError(t, headApp.Commit())
+
+			walSize, err = db.Head().wal.Size()
+			require.NoError(t, err)
+			wblSize, err := db.Head().wbl.Size()
+			require.NoError(t, err)
+			require.NotZero(t, wblSize)
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			expSize = blockSize + walSize + wblSize + cdmSize
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Decrease the max bytes limit so that a delete is triggered.
+			// Check total size, total count and check that the oldest block was deleted.
+			firstBlockSize := db.Blocks()[0].Size()
+			sizeLimit := actSize - firstBlockSize
+			db.opts.MaxBytes = sizeLimit          // Set the new db size limit one block smaller that the actual size.
+			require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
+
+			expBlocks := blocks[1:]
+			actBlocks := db.Blocks()
+			blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
+			walSize, err = db.Head().wal.Size()
+			require.NoError(t, err)
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			// Expected size should take into account block size + WAL size + WBL size
+			expSize = blockSize + walSize + wblSize + cdmSize
+			actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+
+			require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
+			require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
+			require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
+			require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
+			require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
+			require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
+		})
 	}
-	require.NoError(t, headApp.Commit())
-	db.Head().mmapHeadChunks()
-
-	require.Eventually(t, func() bool {
-		return db.Head().chunkDiskMapper.IsQueueEmpty()
-	}, 2*time.Second, 100*time.Millisecond)
-
-	// Test that registered size matches the actual disk size.
-	require.NoError(t, db.reloadBlocks())                               // Reload the db to register the new db size.
-	require.Len(t, db.Blocks(), len(blocks))                            // Ensure all blocks are registered.
-	blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
-	walSize, err := db.Head().wal.Size()
-	require.NoError(t, err)
-	cdmSize, err := db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	// Expected size should take into account block size + WAL size + Head
-	// chunks size
-	expSize := blockSize + walSize + cdmSize
-	actSize, err := fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Create a WAL checkpoint, and compare sizes.
-	first, last, err := wlog.Segments(db.Head().wal.Dir())
-	require.NoError(t, err)
-	_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0)
-	require.NoError(t, err)
-	blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
-	walSize, err = db.Head().wal.Size()
-	require.NoError(t, err)
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	expSize = blockSize + walSize + cdmSize
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Truncate Chunk Disk Mapper and compare sizes.
-	require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	expSize = blockSize + walSize + cdmSize
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Add some out of order samples to check the size of WBL.
-	headApp = db.Head().AppenderV2(context.Background())
-	for ts := int64(750); ts < 800; ts++ {
-		_, err := headApp.Append(0, aSeries, 0, ts, float64(ts), nil, nil, storage.AOptions{})
-		require.NoError(t, err)
-	}
-	require.NoError(t, headApp.Commit())
-
-	walSize, err = db.Head().wal.Size()
-	require.NoError(t, err)
-	wblSize, err := db.Head().wbl.Size()
-	require.NoError(t, err)
-	require.NotZero(t, wblSize)
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	expSize = blockSize + walSize + wblSize + cdmSize
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Decrease the max bytes limit so that a delete is triggered.
-	// Check total size, total count and check that the oldest block was deleted.
-	firstBlockSize := db.Blocks()[0].Size()
-	sizeLimit := actSize - firstBlockSize
-	db.opts.MaxBytes = sizeLimit          // Set the new db size limit one block smaller that the actual size.
-	require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
-
-	expBlocks := blocks[1:]
-	actBlocks := db.Blocks()
-	blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
-	walSize, err = db.Head().wal.Size()
-	require.NoError(t, err)
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	// Expected size should take into account block size + WAL size + WBL size
-	expSize = blockSize + walSize + wblSize + cdmSize
-	actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-
-	require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
-	require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
-	require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
-	require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
-	require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
-	require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
 }

 func TestNotMatcherSelectsLabelsUnsetSeries_AppendV2(t *testing.T) {
@ -1499,33 +1505,36 @@ func TestInitializeHeadTimestamp_AppendV2(t *testing.T) {
 		require.Equal(t, int64(1000), db.head.MaxTime())
 		require.True(t, db.head.initialized())
 	})
-	t.Run("wal-only", func(t *testing.T) {
-		dir := t.TempDir()

-		require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
-		w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
-		require.NoError(t, err)
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("wal-only,stStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			dir := t.TempDir()

-		var enc record.Encoder
-		err = w.Log(
-			enc.Series([]record.RefSeries{
-				{Ref: 123, Labels: labels.FromStrings("a", "1")},
-				{Ref: 124, Labels: labels.FromStrings("a", "2")},
-			}, nil),
-			enc.Samples([]record.RefSample{
-				{Ref: 123, T: 5000, V: 1},
-				{Ref: 124, T: 15000, V: 1},
-			}, nil),
-		)
-		require.NoError(t, err)
-		require.NoError(t, w.Close())
+			require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
+			w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
+			require.NoError(t, err)

-		db := newTestDB(t, withDir(dir))
+			enc := record.Encoder{EnableSTStorage: enableSTStorage}
+			err = w.Log(
+				enc.Series([]record.RefSeries{
+					{Ref: 123, Labels: labels.FromStrings("a", "1")},
+					{Ref: 124, Labels: labels.FromStrings("a", "2")},
+				}, nil),
+				enc.Samples([]record.RefSample{
+					{Ref: 123, T: 5000, V: 1},
+					{Ref: 124, T: 15000, V: 1},
+				}, nil),
+			)
+			require.NoError(t, err)
+			require.NoError(t, w.Close())

-		require.Equal(t, int64(5000), db.head.MinTime())
-		require.Equal(t, int64(15000), db.head.MaxTime())
-		require.True(t, db.head.initialized())
-	})
+			db := newTestDB(t, withDir(dir))
+
+			require.Equal(t, int64(5000), db.head.MinTime())
+			require.Equal(t, int64(15000), db.head.MaxTime())
+			require.True(t, db.head.initialized())
+		})
+	}
 	t.Run("existing-block", func(t *testing.T) {
 		dir := t.TempDir()

@ -1537,37 +1546,39 @@ func TestInitializeHeadTimestamp_AppendV2(t *testing.T) {
 		require.Equal(t, int64(2000), db.head.MaxTime())
 		require.True(t, db.head.initialized())
 	})
-	t.Run("existing-block-and-wal", func(t *testing.T) {
-		dir := t.TempDir()
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("existing-block-and-wal,stStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			dir := t.TempDir()

-		createBlock(t, dir, genSeries(1, 1, 1000, 6000))
+			createBlock(t, dir, genSeries(1, 1, 1000, 6000))

-		require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
-		w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
-		require.NoError(t, err)
+			require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
+			w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
+			require.NoError(t, err)

-		var enc record.Encoder
-		err = w.Log(
-			enc.Series([]record.RefSeries{
-				{Ref: 123, Labels: labels.FromStrings("a", "1")},
-				{Ref: 124, Labels: labels.FromStrings("a", "2")},
-			}, nil),
-			enc.Samples([]record.RefSample{
-				{Ref: 123, T: 5000, V: 1},
-				{Ref: 124, T: 15000, V: 1},
-			}, nil),
-		)
-		require.NoError(t, err)
-		require.NoError(t, w.Close())
+			enc := record.Encoder{EnableSTStorage: enableSTStorage}
+			err = w.Log(
+				enc.Series([]record.RefSeries{
+					{Ref: 123, Labels: labels.FromStrings("a", "1")},
+					{Ref: 124, Labels: labels.FromStrings("a", "2")},
+				}, nil),
+				enc.Samples([]record.RefSample{
+					{Ref: 123, T: 5000, V: 1},
+					{Ref: 124, T: 15000, V: 1},
+				}, nil),
+			)
+			require.NoError(t, err)
+			require.NoError(t, w.Close())

-		db := newTestDB(t, withDir(dir))
+			db := newTestDB(t, withDir(dir))

-		require.Equal(t, int64(6000), db.head.MinTime())
-		require.Equal(t, int64(15000), db.head.MaxTime())
-		require.True(t, db.head.initialized())
-		// Check that old series has been GCed.
-		require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
-	})
+			require.Equal(t, int64(6000), db.head.MinTime())
+			require.Equal(t, int64(15000), db.head.MaxTime())
+			require.True(t, db.head.initialized())
+			// Check that old series has been GCed.
+			require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
+		})
+	}
 }

 func TestNoEmptyBlocks_AppendV2(t *testing.T) {
@ -3265,7 +3276,7 @@ func testOOOWALWriteAppendV2(t *testing.T,
 				series, err := dec.Series(rec, nil)
 				require.NoError(t, err)
 				records = append(records, series)
-			case record.Samples:
+			case record.Samples, record.SamplesV2:
 				samples, err := dec.Samples(rec, nil)
 				require.NoError(t, err)
 				records = append(records, samples)
@ -3422,112 +3433,116 @@ func TestMetadataInWAL_AppenderV2(t *testing.T) {
 }

 func TestMetadataCheckpointingOnlyKeepsLatestEntry_AppendV2(t *testing.T) {
-	ctx := context.Background()
-	numSamples := 10000
-	hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
-	hb.opts.EnableMetadataWALRecords = true
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			ctx := context.Background()
+			numSamples := 10000
+			hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
+			hb.opts.EnableMetadataWALRecords = true

-	// Add some series so we can append metadata to them.
-	s1 := labels.FromStrings("a", "b")
-	s2 := labels.FromStrings("c", "d")
-	s3 := labels.FromStrings("e", "f")
-	s4 := labels.FromStrings("g", "h")
+			// Add some series so we can append metadata to them.
+			s1 := labels.FromStrings("a", "b")
+			s2 := labels.FromStrings("c", "d")
+			s3 := labels.FromStrings("e", "f")
+			s4 := labels.FromStrings("g", "h")

-	m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
-	m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
-	m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
-	m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
+			m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
+			m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
+			m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
+			m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}

-	app := hb.AppenderV2(ctx)
-	ts := int64(0)
-	_, err := app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m1})
-	require.NoError(t, err)
-	_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
-	require.NoError(t, err)
-	_, err = app.Append(0, s3, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m3})
-	require.NoError(t, err)
-	_, err = app.Append(0, s4, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m4})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			app := hb.AppenderV2(ctx)
+			ts := int64(0)
+			_, err := app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m1})
+			require.NoError(t, err)
+			_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
+			require.NoError(t, err)
+			_, err = app.Append(0, s3, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m3})
+			require.NoError(t, err)
+			_, err = app.Append(0, s4, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m4})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	// Update metadata for first series.
-	m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
-	app = hb.AppenderV2(ctx)
-	ts++
-	_, err = app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m5})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			// Update metadata for first series.
+			m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
+			app = hb.AppenderV2(ctx)
+			ts++
+			_, err = app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m5})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	// Switch back-and-forth metadata for second series.
-	// Since it ended on a new metadata record, we expect a single new entry.
-	m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
+			// Switch back-and-forth metadata for second series.
+			// Since it ended on a new metadata record, we expect a single new entry.
+			m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}

-	app = hb.AppenderV2(ctx)
-	ts++
-	_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			app = hb.AppenderV2(ctx)
+			ts++
+			_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	app = hb.AppenderV2(ctx)
-	ts++
-	_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			app = hb.AppenderV2(ctx)
+			ts++
+			_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	app = hb.AppenderV2(ctx)
-	ts++
-	_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			app = hb.AppenderV2(ctx)
+			ts++
+			_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	app = hb.AppenderV2(ctx)
-	ts++
-	_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			app = hb.AppenderV2(ctx)
+			ts++
+			_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	app = hb.AppenderV2(ctx)
-	ts++
-	_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
-	require.NoError(t, err)
-	require.NoError(t, app.Commit())
+			app = hb.AppenderV2(ctx)
+			ts++
+			_, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6})
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())

-	// Let's create a checkpoint.
-	first, last, err := wlog.Segments(w.Dir())
-	require.NoError(t, err)
-	keep := func(id chunks.HeadSeriesRef) bool {
-		return id != 3
+			// Let's create a checkpoint.
+			first, last, err := wlog.Segments(w.Dir())
+			require.NoError(t, err)
+			keep := func(id chunks.HeadSeriesRef) bool {
+				return id != 3
+			}
+			_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0, enableSTStorage)
+			require.NoError(t, err)
+
+			// Confirm there's been a checkpoint.
+			cdir, _, err := wlog.LastCheckpoint(w.Dir())
+			require.NoError(t, err)
+
+			// Read in checkpoint and WAL.
+			recs := readTestWAL(t, cdir)
+			var gotMetadataBlocks [][]record.RefMetadata
+			for _, rec := range recs {
+				if mr, ok := rec.([]record.RefMetadata); ok {
+					gotMetadataBlocks = append(gotMetadataBlocks, mr)
+				}
+			}
+
+			// There should only be 1 metadata block present, with only the latest
+			// metadata kept around.
+			wantMetadata := []record.RefMetadata{
+				{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
+				{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
+				{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
+			}
+			require.Len(t, gotMetadataBlocks, 1)
+			require.Len(t, gotMetadataBlocks[0], 3)
+			gotMetadataBlock := gotMetadataBlocks[0]
+
+			sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
+			require.Equal(t, wantMetadata, gotMetadataBlock)
+			require.NoError(t, hb.Close())
+		})
 	}
-	_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0)
-	require.NoError(t, err)
-
-	// Confirm there's been a checkpoint.
-	cdir, _, err := wlog.LastCheckpoint(w.Dir())
-	require.NoError(t, err)
-
-	// Read in checkpoint and WAL.
-	recs := readTestWAL(t, cdir)
-	var gotMetadataBlocks [][]record.RefMetadata
-	for _, rec := range recs {
-		if mr, ok := rec.([]record.RefMetadata); ok {
-			gotMetadataBlocks = append(gotMetadataBlocks, mr)
-		}
-	}
-
-	// There should only be 1 metadata block present, with only the latest
-	// metadata kept around.
-	wantMetadata := []record.RefMetadata{
-		{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
-		{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
-		{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
-	}
-	require.Len(t, gotMetadataBlocks, 1)
-	require.Len(t, gotMetadataBlocks[0], 3)
-	gotMetadataBlock := gotMetadataBlocks[0]
-
-	sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
-	require.Equal(t, wantMetadata, gotMetadataBlock)
-	require.NoError(t, hb.Close())
 }

 func TestMetadataAssertInMemoryData_AppendV2(t *testing.T) {
@ -7489,6 +7504,65 @@ func TestAbortBlockCompactions_AppendV2(t *testing.T) {
 	require.Equal(t, 4, compactions, "expected 4 compactions to be completed")
 }

+// TestCompactHeadWithSTStorage_AppendV2 ensures that when EnableSTStorage is true,
+// compacted blocks contain chunks with EncXOR2 encoding for float samples.
+func TestCompactHeadWithSTStorage_AppendV2(t *testing.T) {
+	t.Parallel()
+
+	opts := &Options{
+		RetentionDuration:  int64(time.Hour * 24 * 15 / time.Millisecond),
+		NoLockfile:         true,
+		MinBlockDuration:   int64(time.Hour * 2 / time.Millisecond),
+		MaxBlockDuration:   int64(time.Hour * 2 / time.Millisecond),
+		WALCompression:     compression.Snappy,
+		EnableSTStorage:    true,
+		EnableXOR2Encoding: true,
+	}
+	db := newTestDB(t, withOpts(opts))
+	ctx := context.Background()
+	app := db.AppenderV2(ctx)
+
+	mint := 100
+	maxt := 200
+	for i := mint; i < maxt; i++ {
+		_, err := app.Append(0, labels.FromStrings("a", "b"), 50, int64(i), float64(i), nil, nil, storage.AOptions{})
+		require.NoError(t, err)
+	}
+	require.NoError(t, app.Commit())
+
+	require.NoError(t, db.CompactHead(NewRangeHead(db.Head(), int64(mint), int64(maxt)-1)))
+	require.Len(t, db.Blocks(), 1)
+	b := db.Blocks()[0]
+
+	chunkr, err := b.Chunks()
+	require.NoError(t, err)
+	defer chunkr.Close()
+
+	indexr, err := b.Index()
+	require.NoError(t, err)
+	defer indexr.Close()
+
+	p, err := indexr.Postings(ctx, "a", "b")
+	require.NoError(t, err)
+
+	chunkCount := 0
+	for p.Next() {
+		var builder labels.ScratchBuilder
+		var chks []chunks.Meta
+		require.NoError(t, indexr.Series(p.At(), &builder, &chks))
+
+		for _, chk := range chks {
+			c, _, err := chunkr.ChunkOrIterable(chk)
+			require.NoError(t, err)
+			require.Equal(t, chunkenc.EncXOR2, c.Encoding(),
+				"unexpected chunk encoding, got %s", c.Encoding())
+			chunkCount++
+		}
+	}
+	require.NoError(t, p.Err())
+	require.Positive(t, chunkCount, "expected at least one chunk")
+}
+
 func TestNewCompactorFunc_AppendV2(t *testing.T) {
 	opts := DefaultOptions()
 	block1 := ulid.MustNew(1, nil)
@ -7520,3 +7594,111 @@ func TestNewCompactorFunc_AppendV2(t *testing.T) {
 	require.Len(t, ulids, 1)
 	require.Equal(t, block2, ulids[0])
 }
+
+// TestDBAppenderV2_STStorage_OutOfOrder verifies that ST storage works correctly
+// when samples are appended out of order and can be queried using ChunkQuerier.
+func TestDBAppenderV2_STStorage_OutOfOrder(t *testing.T) {
+	testHistogram := tsdbutil.GenerateTestHistogram(1)
+	testHistogram.CounterResetHint = histogram.NotCounterReset
+
+	testCases := []struct {
+		name            string
+		appendSamples   []chunks.Sample
+		expectedSamples []chunks.Sample
+	}{
+		{
+			name: "Float samples out of order",
+			appendSamples: []chunks.Sample{
+				newSample(20, 200, 2.0, nil, nil), // Append second sample first.
+				newSample(10, 100, 1.0, nil, nil), // Append first sample second (OOO).
+				newSample(30, 300, 3.0, nil, nil), // Append third sample last.
+				newSample(25, 250, 2.5, nil, nil), // Append middle sample (OOO).
+			},
+			expectedSamples: []chunks.Sample{
+				newSample(10, 100, 1.0, nil, nil),
+				newSample(20, 200, 2.0, nil, nil),
+				newSample(25, 250, 2.5, nil, nil),
+				newSample(30, 300, 3.0, nil, nil),
+			},
+		},
+		{
+			name: "Histogram samples out of order",
+			appendSamples: []chunks.Sample{
+				newSample(30, 300, 0, testHistogram, nil), // Append third sample first.
+				newSample(10, 100, 0, testHistogram, nil), // Append first sample second (OOO).
+				newSample(20, 200, 0, testHistogram, nil), // Append second sample last (OOO).
+			},
+			// Histograms don't support ST storage yet, should return 0 for ST.
+			expectedSamples: []chunks.Sample{
+				newSample(0, 100, 0, testHistogram, nil),
+				newSample(0, 200, 0, testHistogram, nil),
+				newSample(0, 300, 0, testHistogram, nil),
+			},
+		},
+		{
+			name: "Mixed float samples with same ST",
+			appendSamples: []chunks.Sample{
+				newSample(10, 200, 2.0, nil, nil),
+				newSample(10, 100, 1.0, nil, nil), // OOO with same ST.
+				newSample(10, 300, 3.0, nil, nil),
+			},
+			expectedSamples: []chunks.Sample{
+				newSample(10, 100, 1.0, nil, nil),
+				newSample(10, 200, 2.0, nil, nil),
+				newSample(10, 300, 3.0, nil, nil),
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			opts := DefaultOptions()
+			opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
+			opts.EnableSTStorage = true
+			opts.EnableXOR2Encoding = true
+			db := newTestDB(t, withOpts(opts))
+			db.DisableCompactions()
+
+			lbls := labels.FromStrings("foo", "bar")
+
+			for _, s := range tc.appendSamples {
+				app := db.AppenderV2(context.Background())
+				_, err := app.Append(0, lbls, s.ST(), s.T(), s.F(), s.H(), s.FH(), storage.AOptions{})
+				require.NoError(t, err, "Appending OOO sample with ST should succeed")
+				require.NoError(t, app.Commit(), "Committing OOO sample with ST should succeed")
+			}
+
+			querier, err := db.ChunkQuerier(math.MinInt64, math.MaxInt64)
+			require.NoError(t, err)
+			defer querier.Close()
+
+			ss := querier.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+			require.True(t, ss.Next(), "Should have series")
+			series := ss.At()
+			require.NoError(t, ss.Err())
+			require.False(t, ss.Next(), "Should have only one series")
+
+			chunkIt := series.Iterator(nil)
+			var actualSamples []chunks.Sample
+
+			for chunkIt.Next() {
+				chk := chunkIt.At()
+				it := chk.Chunk.Iterator(nil)
+				samples, err := storage.ExpandSamples(it, newSample)
+				require.NoError(t, err)
+				actualSamples = append(actualSamples, samples...)
+			}
+			require.NoError(t, chunkIt.Err())
+
+			// Use requireEqualSamplesIgnoreCounterResets to ignore histogram counter reset hints.
+			requireEqualSamples(t, lbls.String(), tc.expectedSamples, actualSamples, requireEqualSamplesIgnoreCounterResets)
+
+			// Additionally verify ST values match expectations.
+			require.Len(t, actualSamples, len(tc.expectedSamples))
+			for i, expected := range tc.expectedSamples {
+				actual := actualSamples[i]
+				require.Equal(t, expected.ST(), actual.ST(), "Sample %d: ST should match", i)
+			}
+		})
+	}
+}
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@ -395,7 +395,7 @@ func TestDataNotAvailableAfterRollback(t *testing.T) {
 			require.NoError(t, err)
 			walSeriesCount += len(series)

-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			var samples []record.RefSample
 			samples, err = dec.Samples(rec, samples)
 			require.NoError(t, err)
@ -1170,24 +1170,25 @@ func TestWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T) {

 	// We test both with few and many samples appended after series creation. If samples are < 120 then there's no
 	// mmap-ed chunk, otherwise there's at least 1 mmap-ed chunk when replaying the WAL.
-	for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
-		for run := 1; run <= numRuns; run++ {
-			t.Run(fmt.Sprintf("samples after series creation = %d, run = %d", numSamplesAfterSeriesCreation, run), func(t *testing.T) {
-				testWALReplayRaceOnSamplesLoggedBeforeSeries(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation)
-			})
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, numSamplesAfterSeriesCreation := range []int{1, 1000} {
+			for run := 1; run <= numRuns; run++ {
+				t.Run(fmt.Sprintf("samples after series creation = %d, run = %d, stStorage=%v", numSamplesAfterSeriesCreation, run, enableSTStorage), func(t *testing.T) {
+					testWALReplayRaceOnSamplesLoggedBeforeSeries(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation, enableSTStorage)
+				})
+			}
 		}
 	}
 }

-func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int) {
+func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int, enableSTStorage bool) {
 	const numSeries = 1000
-
 	db := newTestDB(t)
 	db.DisableCompactions()

 	for seriesRef := 1; seriesRef <= numSeries; seriesRef++ {
 		// Log samples before the series is logged to the WAL.
-		var enc record.Encoder
+		enc := record.Encoder{EnableSTStorage: enableSTStorage}
 		var samples []record.RefSample

 		for ts := range numSamplesBeforeSeriesCreation {
@ -1551,139 +1552,143 @@ func TestRetentionDurationMetric(t *testing.T) {

 func TestSizeRetention(t *testing.T) {
 	t.Parallel()
-	opts := DefaultOptions()
-	opts.OutOfOrderTimeWindow = 100
-	db := newTestDB(t, withOpts(opts), withRngs(100))
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			opts := DefaultOptions()
+			opts.OutOfOrderTimeWindow = 100
+			db := newTestDB(t, withOpts(opts), withRngs(100))

-	blocks := []*BlockMeta{
-		{MinTime: 100, MaxTime: 200}, // Oldest block
-		{MinTime: 200, MaxTime: 300},
-		{MinTime: 300, MaxTime: 400},
-		{MinTime: 400, MaxTime: 500},
-		{MinTime: 500, MaxTime: 600}, // Newest Block
-	}
+			blocks := []*BlockMeta{
+				{MinTime: 100, MaxTime: 200}, // Oldest block
+				{MinTime: 200, MaxTime: 300},
+				{MinTime: 300, MaxTime: 400},
+				{MinTime: 400, MaxTime: 500},
+				{MinTime: 500, MaxTime: 600}, // Newest Block
+			}

-	for _, m := range blocks {
-		createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
-	}
+			for _, m := range blocks {
+				createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime))
+			}

-	headBlocks := []*BlockMeta{
-		{MinTime: 700, MaxTime: 800},
-	}
+			headBlocks := []*BlockMeta{
+				{MinTime: 700, MaxTime: 800},
+			}

-	// Add some data to the WAL.
-	headApp := db.Head().Appender(context.Background())
-	var aSeries labels.Labels
-	var it chunkenc.Iterator
-	for _, m := range headBlocks {
-		series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
-		for _, s := range series {
-			aSeries = s.Labels()
-			it = s.Iterator(it)
-			for it.Next() == chunkenc.ValFloat {
-				tim, v := it.At()
-				_, err := headApp.Append(0, s.Labels(), tim, v)
+			// Add some data to the WAL.
+			headApp := db.Head().Appender(context.Background())
+			var aSeries labels.Labels
+			var it chunkenc.Iterator
+			for _, m := range headBlocks {
+				series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
+				for _, s := range series {
+					aSeries = s.Labels()
+					it = s.Iterator(it)
+					for it.Next() == chunkenc.ValFloat {
+						tim, v := it.At()
+						_, err := headApp.Append(0, s.Labels(), tim, v)
+						require.NoError(t, err)
+					}
+					require.NoError(t, it.Err())
+				}
+			}
+			require.NoError(t, headApp.Commit())
+			db.Head().mmapHeadChunks()
+
+			require.Eventually(t, func() bool {
+				return db.Head().chunkDiskMapper.IsQueueEmpty()
+			}, 2*time.Second, 100*time.Millisecond)
+
+			// Test that registered size matches the actual disk size.
+			require.NoError(t, db.reloadBlocks())                               // Reload the db to register the new db size.
+			require.Len(t, db.Blocks(), len(blocks))                            // Ensure all blocks are registered.
+			blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
+			walSize, err := db.Head().wal.Size()
+			require.NoError(t, err)
+			cdmSize, err := db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			// Expected size should take into account block size + WAL size + Head
+			// chunks size
+			expSize := blockSize + walSize + cdmSize
+			actSize, err := fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Create a WAL checkpoint, and compare sizes.
+			first, last, err := wlog.Segments(db.Head().wal.Dir())
+			require.NoError(t, err)
+			_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0, enableSTStorage)
+			require.NoError(t, err)
+			blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
+			walSize, err = db.Head().wal.Size()
+			require.NoError(t, err)
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			expSize = blockSize + walSize + cdmSize
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Truncate Chunk Disk Mapper and compare sizes.
+			require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			expSize = blockSize + walSize + cdmSize
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Add some out of order samples to check the size of WBL.
+			headApp = db.Head().Appender(context.Background())
+			for ts := int64(750); ts < 800; ts++ {
+				_, err := headApp.Append(0, aSeries, ts, float64(ts))
 				require.NoError(t, err)
 			}
-			require.NoError(t, it.Err())
-		}
+			require.NoError(t, headApp.Commit())
+
+			walSize, err = db.Head().wal.Size()
+			require.NoError(t, err)
+			wblSize, err := db.Head().wbl.Size()
+			require.NoError(t, err)
+			require.NotZero(t, wblSize)
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			expSize = blockSize + walSize + wblSize + cdmSize
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+			require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
+			// Decrease the max bytes limit so that a delete is triggered.
+			// Check total size, total count and check that the oldest block was deleted.
+			firstBlockSize := db.Blocks()[0].Size()
+			sizeLimit := actSize - firstBlockSize
+			db.opts.MaxBytes = sizeLimit          // Set the new db size limit one block smaller that the actual size.
+			require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
+
+			expBlocks := blocks[1:]
+			actBlocks := db.Blocks()
+			blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
+			walSize, err = db.Head().wal.Size()
+			require.NoError(t, err)
+			cdmSize, err = db.Head().chunkDiskMapper.Size()
+			require.NoError(t, err)
+			require.NotZero(t, cdmSize)
+			// Expected size should take into account block size + WAL size + WBL size
+			expSize = blockSize + walSize + wblSize + cdmSize
+			actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
+			actSize, err = fileutil.DirSize(db.Dir())
+			require.NoError(t, err)
+
+			require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
+			require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
+			require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
+			require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
+			require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
+			require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
+		})
 	}
-	require.NoError(t, headApp.Commit())
-	db.Head().mmapHeadChunks()
-
-	require.Eventually(t, func() bool {
-		return db.Head().chunkDiskMapper.IsQueueEmpty()
-	}, 2*time.Second, 100*time.Millisecond)
-
-	// Test that registered size matches the actual disk size.
-	require.NoError(t, db.reloadBlocks())                               // Reload the db to register the new db size.
-	require.Len(t, db.Blocks(), len(blocks))                            // Ensure all blocks are registered.
-	blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
-	walSize, err := db.Head().wal.Size()
-	require.NoError(t, err)
-	cdmSize, err := db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	// Expected size should take into account block size + WAL size + Head
-	// chunks size
-	expSize := blockSize + walSize + cdmSize
-	actSize, err := fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Create a WAL checkpoint, and compare sizes.
-	first, last, err := wlog.Segments(db.Head().wal.Dir())
-	require.NoError(t, err)
-	_, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0)
-	require.NoError(t, err)
-	blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics.
-	walSize, err = db.Head().wal.Size()
-	require.NoError(t, err)
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	expSize = blockSize + walSize + cdmSize
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Truncate Chunk Disk Mapper and compare sizes.
-	require.NoError(t, db.Head().chunkDiskMapper.Truncate(900))
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	expSize = blockSize + walSize + cdmSize
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Add some out of order samples to check the size of WBL.
-	headApp = db.Head().Appender(context.Background())
-	for ts := int64(750); ts < 800; ts++ {
-		_, err := headApp.Append(0, aSeries, ts, float64(ts))
-		require.NoError(t, err)
-	}
-	require.NoError(t, headApp.Commit())
-
-	walSize, err = db.Head().wal.Size()
-	require.NoError(t, err)
-	wblSize, err := db.Head().wbl.Size()
-	require.NoError(t, err)
-	require.NotZero(t, wblSize)
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	expSize = blockSize + walSize + wblSize + cdmSize
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
-
-	// Decrease the max bytes limit so that a delete is triggered.
-	// Check total size, total count and check that the oldest block was deleted.
-	firstBlockSize := db.Blocks()[0].Size()
-	sizeLimit := actSize - firstBlockSize
-	db.opts.MaxBytes = sizeLimit          // Set the new db size limit one block smaller that the actual size.
-	require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size.
-
-	expBlocks := blocks[1:]
-	actBlocks := db.Blocks()
-	blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes))
-	walSize, err = db.Head().wal.Size()
-	require.NoError(t, err)
-	cdmSize, err = db.Head().chunkDiskMapper.Size()
-	require.NoError(t, err)
-	require.NotZero(t, cdmSize)
-	// Expected size should take into account block size + WAL size + WBL size
-	expSize = blockSize + walSize + wblSize + cdmSize
-	actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
-	actSize, err = fileutil.DirSize(db.Dir())
-	require.NoError(t, err)
-
-	require.Equal(t, 1, actRetentionCount, "metric retention count mismatch")
-	require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size")
-	require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit)
-	require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1)
-	require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block")
-	require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block")
 }

 func TestSizeRetentionMetric(t *testing.T) {
@ -1743,7 +1748,7 @@ func TestRuntimeRetentionConfigChange(t *testing.T) {
 		StorageConfig: config.StorageConfig{
 			TSDBConfig: &config.TSDBConfig{
 				Retention: &config.TSDBRetentionConfig{
-					Time: model.Duration(shorterRetentionDuration),
+					Time: model.Duration(time.Duration(shorterRetentionDuration) * time.Millisecond),
 				},
 			},
 		},
@ -1772,6 +1777,31 @@ func TestRuntimeRetentionConfigChange(t *testing.T) {
 	require.Positive(t, int(prom_testutil.ToFloat64(db.metrics.timeRetentionCount)), "time retention count should be incremented")
 }

+// TestApplyConfigRetentionDurationMetricUnit verifies that after a config
+// reload the prometheus_tsdb_retention_limit_seconds metric reports the
+// retention in seconds.
+func TestApplyConfigRetentionDurationMetricUnit(t *testing.T) {
+	oneHourMs := int64(time.Hour / time.Millisecond)
+	db := newTestDB(t, withOpts(&Options{RetentionDuration: oneHourMs}))
+
+	cfg := &config.Config{
+		StorageConfig: config.StorageConfig{
+			TSDBConfig: &config.TSDBConfig{
+				Retention: &config.TSDBRetentionConfig{
+					Time: model.Duration(time.Hour),
+				},
+			},
+		},
+	}
+	require.NoError(t, db.ApplyConfig(cfg))
+
+	require.Equal(t, oneHourMs, db.getRetentionDuration())
+
+	gotSeconds := prom_testutil.ToFloat64(db.metrics.retentionDuration)
+	wantSeconds := time.Hour.Seconds()
+	require.Equal(t, wantSeconds, gotSeconds)
+}
+
 func TestNotMatcherSelectsLabelsUnsetSeries(t *testing.T) {
 	db := newTestDB(t)

@ -2072,33 +2102,36 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 		require.Equal(t, int64(1000), db.head.MaxTime())
 		require.True(t, db.head.initialized())
 	})
-	t.Run("wal-only", func(t *testing.T) {
-		dir := t.TempDir()

-		require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
-		w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
-		require.NoError(t, err)
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("wal-only-st-"+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			dir := t.TempDir()

-		var enc record.Encoder
-		err = w.Log(
-			enc.Series([]record.RefSeries{
-				{Ref: 123, Labels: labels.FromStrings("a", "1")},
-				{Ref: 124, Labels: labels.FromStrings("a", "2")},
-			}, nil),
-			enc.Samples([]record.RefSample{
-				{Ref: 123, T: 5000, V: 1},
-				{Ref: 124, T: 15000, V: 1},
-			}, nil),
-		)
-		require.NoError(t, err)
-		require.NoError(t, w.Close())
+			require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
+			w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
+			require.NoError(t, err)

-		db := newTestDB(t, withDir(dir))
+			enc := record.Encoder{EnableSTStorage: enableSTStorage}
+			err = w.Log(
+				enc.Series([]record.RefSeries{
+					{Ref: 123, Labels: labels.FromStrings("a", "1")},
+					{Ref: 124, Labels: labels.FromStrings("a", "2")},
+				}, nil),
+				enc.Samples([]record.RefSample{
+					{Ref: 123, T: 5000, V: 1},
+					{Ref: 124, T: 15000, V: 1},
+				}, nil),
+			)
+			require.NoError(t, err)
+			require.NoError(t, w.Close())

-		require.Equal(t, int64(5000), db.head.MinTime())
-		require.Equal(t, int64(15000), db.head.MaxTime())
-		require.True(t, db.head.initialized())
-	})
+			db := newTestDB(t, withDir(dir))
+
+			require.Equal(t, int64(5000), db.head.MinTime())
+			require.Equal(t, int64(15000), db.head.MaxTime())
+			require.True(t, db.head.initialized())
+		})
+	}
 	t.Run("existing-block", func(t *testing.T) {
 		dir := t.TempDir()

@ -2110,37 +2143,40 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 		require.Equal(t, int64(2000), db.head.MaxTime())
 		require.True(t, db.head.initialized())
 	})
-	t.Run("existing-block-and-wal", func(t *testing.T) {
-		dir := t.TempDir()

-		createBlock(t, dir, genSeries(1, 1, 1000, 6000))
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("existing-block-and-wal,enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			dir := t.TempDir()

-		require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
-		w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
-		require.NoError(t, err)
+			createBlock(t, dir, genSeries(1, 1, 1000, 6000))

-		var enc record.Encoder
-		err = w.Log(
-			enc.Series([]record.RefSeries{
-				{Ref: 123, Labels: labels.FromStrings("a", "1")},
-				{Ref: 124, Labels: labels.FromStrings("a", "2")},
-			}, nil),
-			enc.Samples([]record.RefSample{
-				{Ref: 123, T: 5000, V: 1},
-				{Ref: 124, T: 15000, V: 1},
-			}, nil),
-		)
-		require.NoError(t, err)
-		require.NoError(t, w.Close())
+			require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777))
+			w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None)
+			require.NoError(t, err)

-		db := newTestDB(t, withDir(dir))
+			enc := record.Encoder{EnableSTStorage: enableSTStorage}
+			err = w.Log(
+				enc.Series([]record.RefSeries{
+					{Ref: 123, Labels: labels.FromStrings("a", "1")},
+					{Ref: 124, Labels: labels.FromStrings("a", "2")},
+				}, nil),
+				enc.Samples([]record.RefSample{
+					{Ref: 123, T: 5000, V: 1},
+					{Ref: 124, T: 15000, V: 1},
+				}, nil),
+			)
+			require.NoError(t, err)
+			require.NoError(t, w.Close())

-		require.Equal(t, int64(6000), db.head.MinTime())
-		require.Equal(t, int64(15000), db.head.MaxTime())
-		require.True(t, db.head.initialized())
-		// Check that old series has been GCed.
-		require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
-	})
+			db := newTestDB(t, withDir(dir))
+
+			require.Equal(t, int64(6000), db.head.MinTime())
+			require.Equal(t, int64(15000), db.head.MaxTime())
+			require.True(t, db.head.initialized())
+			// Check that old series has been GCed.
+			require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series))
+		})
+	}
 }

 func TestNoEmptyBlocks(t *testing.T) {
@ -4523,7 +4559,7 @@ func testOOOWALWrite(t *testing.T,
 				series, err := dec.Series(rec, nil)
 				require.NoError(t, err)
 				records = append(records, series)
-			case record.Samples:
+			case record.Samples, record.SamplesV2:
 				samples, err := dec.Samples(rec, nil)
 				require.NoError(t, err)
 				records = append(records, samples)
@ -4684,102 +4720,106 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) {
 		require.NoError(t, err)
 	}

-	ctx := context.Background()
-	numSamples := 10000
-	hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			ctx := context.Background()
+			numSamples := 10000
+			hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false)

-	// Add some series so we can append metadata to them.
-	app := hb.Appender(ctx)
-	s1 := labels.FromStrings("a", "b")
-	s2 := labels.FromStrings("c", "d")
-	s3 := labels.FromStrings("e", "f")
-	s4 := labels.FromStrings("g", "h")
+			// Add some series so we can append metadata to them.
+			app := hb.Appender(ctx)
+			s1 := labels.FromStrings("a", "b")
+			s2 := labels.FromStrings("c", "d")
+			s3 := labels.FromStrings("e", "f")
+			s4 := labels.FromStrings("g", "h")

-	for _, s := range []labels.Labels{s1, s2, s3, s4} {
-		_, err := app.Append(0, s, 0, 0)
-		require.NoError(t, err)
+			for _, s := range []labels.Labels{s1, s2, s3, s4} {
+				_, err := app.Append(0, s, 0, 0)
+				require.NoError(t, err)
+			}
+			require.NoError(t, app.Commit())
+
+			// Add a first round of metadata to the first three series.
+			// Re-take the Appender, as the previous Commit will have it closed.
+			m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
+			m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
+			m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
+			m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s1, m1)
+			updateMetadata(t, app, s2, m2)
+			updateMetadata(t, app, s3, m3)
+			updateMetadata(t, app, s4, m4)
+			require.NoError(t, app.Commit())
+
+			// Update metadata for first series.
+			m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s1, m5)
+			require.NoError(t, app.Commit())
+
+			// Switch back-and-forth metadata for second series.
+			// Since it ended on a new metadata record, we expect a single new entry.
+			m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
+
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s2, m6)
+			require.NoError(t, app.Commit())
+
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s2, m2)
+			require.NoError(t, app.Commit())
+
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s2, m6)
+			require.NoError(t, app.Commit())
+
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s2, m2)
+			require.NoError(t, app.Commit())
+
+			app = hb.Appender(ctx)
+			updateMetadata(t, app, s2, m6)
+			require.NoError(t, app.Commit())
+
+			// Let's create a checkpoint.
+			first, last, err := wlog.Segments(w.Dir())
+			require.NoError(t, err)
+			keep := func(id chunks.HeadSeriesRef) bool {
+				return id != 3
+			}
+			_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0, enableSTStorage)
+			require.NoError(t, err)
+
+			// Confirm there's been a checkpoint.
+			cdir, _, err := wlog.LastCheckpoint(w.Dir())
+			require.NoError(t, err)
+
+			// Read in checkpoint and WAL.
+			recs := readTestWAL(t, cdir)
+			var gotMetadataBlocks [][]record.RefMetadata
+			for _, rec := range recs {
+				if mr, ok := rec.([]record.RefMetadata); ok {
+					gotMetadataBlocks = append(gotMetadataBlocks, mr)
+				}
+			}
+
+			// There should only be 1 metadata block present, with only the latest
+			// metadata kept around.
+			wantMetadata := []record.RefMetadata{
+				{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
+				{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
+				{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
+			}
+			require.Len(t, gotMetadataBlocks, 1)
+			require.Len(t, gotMetadataBlocks[0], 3)
+			gotMetadataBlock := gotMetadataBlocks[0]
+
+			sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
+			require.Equal(t, wantMetadata, gotMetadataBlock)
+			require.NoError(t, hb.Close())
+		})
 	}
-	require.NoError(t, app.Commit())
-
-	// Add a first round of metadata to the first three series.
-	// Re-take the Appender, as the previous Commit will have it closed.
-	m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"}
-	m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"}
-	m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"}
-	m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"}
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s1, m1)
-	updateMetadata(t, app, s2, m2)
-	updateMetadata(t, app, s3, m3)
-	updateMetadata(t, app, s4, m4)
-	require.NoError(t, app.Commit())
-
-	// Update metadata for first series.
-	m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"}
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s1, m5)
-	require.NoError(t, app.Commit())
-
-	// Switch back-and-forth metadata for second series.
-	// Since it ended on a new metadata record, we expect a single new entry.
-	m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"}
-
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s2, m6)
-	require.NoError(t, app.Commit())
-
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s2, m2)
-	require.NoError(t, app.Commit())
-
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s2, m6)
-	require.NoError(t, app.Commit())
-
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s2, m2)
-	require.NoError(t, app.Commit())
-
-	app = hb.Appender(ctx)
-	updateMetadata(t, app, s2, m6)
-	require.NoError(t, app.Commit())
-
-	// Let's create a checkpoint.
-	first, last, err := wlog.Segments(w.Dir())
-	require.NoError(t, err)
-	keep := func(id chunks.HeadSeriesRef) bool {
-		return id != 3
-	}
-	_, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0)
-	require.NoError(t, err)
-
-	// Confirm there's been a checkpoint.
-	cdir, _, err := wlog.LastCheckpoint(w.Dir())
-	require.NoError(t, err)
-
-	// Read in checkpoint and WAL.
-	recs := readTestWAL(t, cdir)
-	var gotMetadataBlocks [][]record.RefMetadata
-	for _, rec := range recs {
-		if mr, ok := rec.([]record.RefMetadata); ok {
-			gotMetadataBlocks = append(gotMetadataBlocks, mr)
-		}
-	}
-
-	// There should only be 1 metadata block present, with only the latest
-	// metadata kept around.
-	wantMetadata := []record.RefMetadata{
-		{Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help},
-		{Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help},
-		{Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help},
-	}
-	require.Len(t, gotMetadataBlocks, 1)
-	require.Len(t, gotMetadataBlocks[0], 3)
-	gotMetadataBlock := gotMetadataBlocks[0]
-
-	sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref })
-	require.Equal(t, wantMetadata, gotMetadataBlock)
-	require.NoError(t, hb.Close())
 }

 func TestMetadataAssertInMemoryData(t *testing.T) {
--- a/tsdb/docs/format/chunks.md
+++ b/tsdb/docs/format/chunks.md
@ -65,6 +65,96 @@ Notes:
 * `padding` of 0 to 7 bits so that the whole chunk data is byte-aligned.
 * The chunk can have as few as one sample, i.e. `ts_1`, `v_1`, etc. are optional.

+## XOR2 chunk data
+
+XOR2 uses the same structure as XOR for samples 0 and 1. Starting from sample 2,
+a joint control prefix encodes both the timestamp delta-of-delta (dod) and whether
+the value changed, with common dod cases byte-aligned for efficient writing.
+
+XOR2 can encode start timestamp (ST) as well optionally, see details further
+down.
+
+
+```
+┌──────────────────────┬───────────────────┬───────────────┬───────────────┬────────────────┬─-
+│ num_samples <uint16> │ st_header <uint8> | ts_0 <varint> │ v_0 <float64> │ ?st_0 <varint> |
+└──────────────────────┴───────────────────┴───────────────┴───────────────┴────────────────┴─-
+
+-─────────────────────┬───────────────────────┬─────────────────────────┬─-
+ ts_1_delta <uvarint> │ v_1_xor <varbit_xor2> │ ?st_1_delta <varbit_ts> |
+-─────────────────────┴───────────────────────┴─────────────────────────┴─-
+
+-─────────────────────────┬───────────────────────┬─────┬─-
+ sample_2 <joint_sample2> │ ?st_2_dod <varbit_ts> | ... │
+-─────────────────────────┴───────────────────────┴─────┴─-
+
+-─────────────────────────┬───────────────────────┬──────────────────┐
+ sample_n <joint_sample2> │ ?st_n_dod <varbit_ts> | padding <x bits> │
+-─────────────────────────┴───────────────────────┴──────────────────┘
+
+```
+
+### Joint sample encoding for n >= 2 (`<joint_sample2>`):
+
+Each sample starts with a variable-length control prefix that jointly encodes the
+dod and value change status:
+
+| Control prefix | dod | Value encoding that follows |
+|---|---|---|
+| `0` | 0 | (none, value unchanged) |
+| `10` | 0 | `<varbit_xor2_nn>` (value known non-zero and non-stale) |
+| `110DDDDD` `DDDDDDDD` | 13-bit signed [-4096, 4095] | `<varbit_xor2>` |
+| `1110DDDD` `DDDDDDDD` `DDDDDDDD` | 20-bit signed [-524288, 524287] | `<varbit_xor2>` |
+| `11110` + 64-bit dod | exact | `<varbit_xor2>` |
+| `11111` | 0 | (none, stale NaN — no value field) |
+
+The `110` and `1110` cases pack the prefix and the most-significant dod bits into
+the first byte, making the full dod field byte-aligned.
+
+### Value delta encoding (`<varbit_xor2>`):
+
+Used after the dod≠0 control prefixes. The XOR of the current and previous value is encoded as:
+
+| Prefix | Meaning |
+|---|---|
+| `0` | XOR = 0 (value unchanged) |
+| `10` | Reuse previous leading/trailing window; `sigbits` value bits follow |
+| `110` + leading(5) + sigbits(6) + value(sigbits) | New leading/trailing window |
+| `111` | Stale NaN marker (3 bits) |
+
+### Value delta encoding, known non-zero (`<varbit_xor2_nn>`):
+
+Used after the `10` control prefix (dod=0, value known to have changed and be non-stale).
+The delta=0 check is skipped, saving one bit on the reuse path:
+
+| Prefix | Meaning |
+|---|---|
+| `0` | Reuse previous leading/trailing window; `sigbits` value bits follow |
+| `1` + leading(5) + sigbits(6) + value(sigbits) | New leading/trailing window |
+
+### Start timestamp encoding
+
+* We use `st_i_dod` and `st_i` interchangeably when `i>1` in these notes.
+* `st_header` is one byte:
+   ```
+   ┌───────────────────────┬───────────────────────┐
+   │ first_st_known<1 bit> | st_changed_on<7 bits> │
+   └───────────────────────┴───────────────────────┘
+   ```
+   where the highest bit `first_st_known` indicates if `st_0` is present or not.
+   If the lower 7bits `st_changed_on` is 0, no `st_i (i>0)` is present.
+   Otherwise `st_i (i>=st_changed_on>)` is present, while
+   `st_i (0<i<st_changed_on)` is not present.
+
+   Due to the 7 bit limitation, once a chunk has at least 127 samples,
+   `st_changed_on` is set to 127 (0xEF) and the 127th and further samples will
+   have `st_i` present.
+* `st_0` is encoded as a `varint` if present.
+* `st_1` is encoded as a `varbit_ts` delta from `st_0` (or from 0 if `st_0` is
+   not present).
+* `st_i_dod` aka `st_i (i>1)` is encoded as a `varbit_ts` "delta of delta" from
+  `st_i-1` (or from 0 if `st_i-1` is not present).
+
 ## Histogram chunk data

 ```
--- a/tsdb/head.go
+++ b/tsdb/head.go
@ -161,6 +161,15 @@ type HeadOptions struct {
 	OutOfOrderTimeWindow atomic.Int64
 	OutOfOrderCapMax     atomic.Int64

+	// EnableSTStorage determines whether databases (WAL/WBL, tsdb,
+	// agent) should set a Start Time value per sample.
+	// Represents 'st-storage' feature flag.
+	EnableSTStorage atomic.Bool
+
+	// EnableXOR2Encoding enables XOR2 chunk encoding for float samples.
+	// Represents 'xor2-encoding' feature flag.
+	EnableXOR2Encoding atomic.Bool
+
 	ChunkRange int64
 	// ChunkDirRoot is the parent directory of the chunks directory.
 	ChunkDirRoot         string
@ -1382,7 +1391,7 @@ func (h *Head) truncateWAL(mint int64) error {
 	}

 	h.metrics.checkpointCreationTotal.Inc()
-	if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpointFn(mint), mint); err != nil {
+	if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpointFn(mint), mint, h.opts.EnableSTStorage.Load()); err != nil {
 		h.metrics.checkpointCreationFail.Inc()
 		var cerr *chunks.CorruptionErr
 		if errors.As(err, &cerr) {
@ -1676,7 +1685,7 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match
 	}

 	if h.wal != nil {
-		var enc record.Encoder
+		enc := record.Encoder{EnableSTStorage: h.opts.EnableSTStorage.Load()}
 		if err := h.wal.Log(enc.Tombstones(stones, nil)); err != nil {
 			return err
 		}
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@ -185,6 +185,8 @@ func (h *Head) appender() *headAppender {
 			typesInBatch:          h.getTypeMap(),
 			appendID:              appendID,
 			cleanupAppendIDsBelow: cleanupAppendIDsBelow,
+			storeST:               h.opts.EnableSTStorage.Load(),
+			useXOR2:               h.opts.EnableXOR2Encoding.Load(),
 		},
 	}
 }
@ -412,6 +414,8 @@ type headAppenderBase struct {

 	appendID, cleanupAppendIDsBelow uint64
 	closed                          bool
+	storeST                         bool
+	useXOR2                         bool
 }
 type headAppender struct {
 	headAppenderBase
@ -1059,7 +1063,7 @@ func (a *headAppenderBase) log() error {
 	defer func() { a.head.putBytesBuffer(buf) }()

 	var rec []byte
-	var enc record.Encoder
+	enc := record.Encoder{EnableSTStorage: a.storeST}

 	if len(a.seriesRefs) > 0 {
 		rec = enc.Series(a.seriesRefs, buf)
@ -1168,6 +1172,7 @@ type appenderCommitContext struct {
 	histoOOBRejected    int
 	inOrderMint         int64
 	inOrderMaxt         int64
+	appendChunkOpts     chunkOpts
 	oooMinT             int64
 	oooMaxT             int64
 	wblSamples          []record.RefSample
@ -1177,8 +1182,7 @@ type appenderCommitContext struct {
 	oooMmapMarkersCount int
 	oooRecords          [][]byte
 	oooCapMax           int64
-	appendChunkOpts     chunkOpts
-	enc                 record.Encoder
+	oooEnc              record.Encoder
 }

 // commitExemplars adds all exemplars from the provided batch to the head's exemplar storage.
@ -1228,31 +1232,31 @@ func (acc *appenderCommitContext) collectOOORecords(a *headAppenderBase) {
 				})
 			}
 		}
-		r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer())
+		r := acc.oooEnc.MmapMarkers(markers, a.head.getBytesBuffer())
 		acc.oooRecords = append(acc.oooRecords, r)
 	}

 	if len(acc.wblSamples) > 0 {
-		r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer())
+		r := acc.oooEnc.Samples(acc.wblSamples, a.head.getBytesBuffer())
 		acc.oooRecords = append(acc.oooRecords, r)
 	}
 	if len(acc.wblHistograms) > 0 {
-		r, customBucketsHistograms := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer())
+		r, customBucketsHistograms := acc.oooEnc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer())
 		if len(r) > 0 {
 			acc.oooRecords = append(acc.oooRecords, r)
 		}
 		if len(customBucketsHistograms) > 0 {
-			r := acc.enc.CustomBucketsHistogramSamples(customBucketsHistograms, a.head.getBytesBuffer())
+			r := acc.oooEnc.CustomBucketsHistogramSamples(customBucketsHistograms, a.head.getBytesBuffer())
 			acc.oooRecords = append(acc.oooRecords, r)
 		}
 	}
 	if len(acc.wblFloatHistograms) > 0 {
-		r, customBucketsFloatHistograms := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer())
+		r, customBucketsFloatHistograms := acc.oooEnc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer())
 		if len(r) > 0 {
 			acc.oooRecords = append(acc.oooRecords, r)
 		}
 		if len(customBucketsFloatHistograms) > 0 {
-			r := acc.enc.CustomBucketsFloatHistogramSamples(customBucketsFloatHistograms, a.head.getBytesBuffer())
+			r := acc.oooEnc.CustomBucketsFloatHistogramSamples(customBucketsFloatHistograms, a.head.getBytesBuffer())
 			acc.oooRecords = append(acc.oooRecords, r)
 		}
 	}
@ -1387,7 +1391,7 @@ func (a *headAppenderBase) commitFloats(b *appendBatch, acc *appenderCommitConte
 			// Sample is OOO and OOO handling is enabled
 			// and the delta is within the OOO tolerance.
 			var mmapRefs []chunks.ChunkDiskMapperRef
-			ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger)
+			ok, chunkCreated, mmapRefs = series.insert(s.ST, s.T, s.V, nil, nil, acc.appendChunkOpts, acc.oooCapMax, a.head.logger)
 			if chunkCreated {
 				r, ok := acc.oooMmapMarkers[series.ref]
 				if !ok || r != nil {
@ -1431,7 +1435,7 @@ func (a *headAppenderBase) commitFloats(b *appendBatch, acc *appenderCommitConte
 		default:
 			newlyStale := !value.IsStaleNaN(series.lastValue) && value.IsStaleNaN(s.V)
 			staleToNonStale := value.IsStaleNaN(series.lastValue) && !value.IsStaleNaN(s.V)
-			ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts)
+			ok, chunkCreated = series.append(s.ST, s.T, s.V, a.appendID, acc.appendChunkOpts)
 			if ok {
 				if s.T < acc.inOrderMint {
 					acc.inOrderMint = s.T
@ -1492,7 +1496,8 @@ func (a *headAppenderBase) commitHistograms(b *appendBatch, acc *appenderCommitC
 			// Sample is OOO and OOO handling is enabled
 			// and the delta is within the OOO tolerance.
 			var mmapRefs []chunks.ChunkDiskMapperRef
-			ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger)
+			// TODO(krajorama,ywwg): Pass ST when available in WAL.
+			ok, chunkCreated, mmapRefs = series.insert(0, s.T, 0, s.H, nil, acc.appendChunkOpts, acc.oooCapMax, a.head.logger)
 			if chunkCreated {
 				r, ok := acc.oooMmapMarkers[series.ref]
 				if !ok || r != nil {
@ -1540,7 +1545,8 @@ func (a *headAppenderBase) commitHistograms(b *appendBatch, acc *appenderCommitC
 				newlyStale = newlyStale && !value.IsStaleNaN(series.lastHistogramValue.Sum)
 				staleToNonStale = value.IsStaleNaN(series.lastHistogramValue.Sum) && !value.IsStaleNaN(s.H.Sum)
 			}
-			ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts)
+			// TODO(krajorama,ywwg): pass ST when available in WAL.
+			ok, chunkCreated = series.appendHistogram(0, s.T, s.H, a.appendID, acc.appendChunkOpts)
 			if ok {
 				if s.T < acc.inOrderMint {
 					acc.inOrderMint = s.T
@ -1601,7 +1607,8 @@ func (a *headAppenderBase) commitFloatHistograms(b *appendBatch, acc *appenderCo
 			// Sample is OOO and OOO handling is enabled
 			// and the delta is within the OOO tolerance.
 			var mmapRefs []chunks.ChunkDiskMapperRef
-			ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger)
+			// TODO(krajorama,ywwg): Pass ST when available in WAL.
+			ok, chunkCreated, mmapRefs = series.insert(0, s.T, 0, nil, s.FH, acc.appendChunkOpts, acc.oooCapMax, a.head.logger)
 			if chunkCreated {
 				r, ok := acc.oooMmapMarkers[series.ref]
 				if !ok || r != nil {
@ -1649,7 +1656,8 @@ func (a *headAppenderBase) commitFloatHistograms(b *appendBatch, acc *appenderCo
 				newlyStale = newlyStale && !value.IsStaleNaN(series.lastFloatHistogramValue.Sum)
 				staleToNonStale = value.IsStaleNaN(series.lastFloatHistogramValue.Sum) && !value.IsStaleNaN(s.FH.Sum)
 			}
-			ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts)
+			// TODO(krajorama,ywwg): pass ST when available in WAL.
+			ok, chunkCreated = series.appendFloatHistogram(0, s.T, s.FH, a.appendID, acc.appendChunkOpts)
 			if ok {
 				if s.T < acc.inOrderMint {
 					acc.inOrderMint = s.T
@ -1741,6 +1749,10 @@ func (a *headAppenderBase) Commit() (err error) {
 			chunkDiskMapper: h.chunkDiskMapper,
 			chunkRange:      h.chunkRange.Load(),
 			samplesPerChunk: h.opts.SamplesPerChunk,
+			useXOR2:         a.useXOR2,
+		},
+		oooEnc: record.Encoder{
+			EnableSTStorage: a.storeST,
 		},
 	}

@ -1796,18 +1808,18 @@ func (a *headAppenderBase) Commit() (err error) {
 }

 // insert is like append, except it inserts. Used for OOO samples.
-func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) {
+func (s *memSeries) insert(st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, o chunkOpts, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) {
 	if s.ooo == nil {
 		s.ooo = &memSeriesOOOFields{}
 	}
 	c := s.ooo.oooHeadChunk
 	if c == nil || c.chunk.NumSamples() == int(oooCapMax) {
 		// Note: If no new samples come in then we rely on compaction to clean up stale in-memory OOO chunks.
-		c, mmapRefs = s.cutNewOOOHeadChunk(t, chunkDiskMapper, logger)
+		c, mmapRefs = s.cutNewOOOHeadChunk(t, o, logger)
 		chunkCreated = true
 	}

-	ok := c.chunk.Insert(t, v, h, fh)
+	ok := c.chunk.Insert(st, t, v, h, fh)
 	if ok {
 		if chunkCreated || t < c.minTime {
 			c.minTime = t
@ -1824,19 +1836,19 @@ type chunkOpts struct {
 	chunkDiskMapper *chunks.ChunkDiskMapper
 	chunkRange      int64
 	samplesPerChunk int
+	useXOR2         bool // Selects XOR2 encoding for float chunks.
 }

 // append adds the sample (t, v) to the series. The caller also has to provide
 // the appendID for isolation. (The appendID can be zero, which results in no
 // isolation for this append.)
 // Series lock must be held when calling.
-func (s *memSeries) append(t int64, v float64, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
-	c, sampleInOrder, chunkCreated := s.appendPreprocessor(t, chunkenc.EncXOR, o)
+func (s *memSeries) append(st, t int64, v float64, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
+	c, sampleInOrder, chunkCreated := s.appendPreprocessor(t, chunkenc.ValFloat.ChunkEncoding(o.useXOR2), o)
 	if !sampleInOrder {
 		return sampleInOrder, chunkCreated
 	}
-	// TODO(krajorama): pass ST.
-	s.app.Append(0, t, v)
+	s.app.Append(st, t, v)

 	c.maxTime = t

@ -1856,14 +1868,14 @@ func (s *memSeries) append(t int64, v float64, appendID uint64, o chunkOpts) (sa
 // In case of recoding the existing chunk, a new chunk is allocated and the old chunk is dropped.
 // To keep the meaning of prometheus_tsdb_head_chunks and prometheus_tsdb_head_chunks_created_total
 // consistent, we return chunkCreated=false in this case.
-func (s *memSeries) appendHistogram(t int64, h *histogram.Histogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
+func (s *memSeries) appendHistogram(st, t int64, h *histogram.Histogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
 	// Head controls the execution of recoding, so that we own the proper
 	// chunk reference afterwards and mmap used up chunks.

 	// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
 	prevApp, _ := s.app.(*chunkenc.HistogramAppender)

-	c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.EncHistogram, o)
+	c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.ValHistogram.ChunkEncoding(o.useXOR2), o)
 	if !sampleInOrder {
 		return sampleInOrder, chunkCreated
 	}
@ -1878,8 +1890,7 @@ func (s *memSeries) appendHistogram(t int64, h *histogram.Histogram, appendID ui
 		prevApp = nil
 	}

-	// TODO(krajorama): pass ST.
-	newChunk, recoded, s.app, _ = s.app.AppendHistogram(prevApp, 0, t, h, false) // false=request a new chunk if needed
+	newChunk, recoded, s.app, _ = s.app.AppendHistogram(prevApp, st, t, h, false) // false=request a new chunk if needed

 	s.lastHistogramValue = h
 	s.lastFloatHistogramValue = nil
@ -1914,14 +1925,14 @@ func (s *memSeries) appendHistogram(t int64, h *histogram.Histogram, appendID ui
 // In case of recoding the existing chunk, a new chunk is allocated and the old chunk is dropped.
 // To keep the meaning of prometheus_tsdb_head_chunks and prometheus_tsdb_head_chunks_created_total
 // consistent, we return chunkCreated=false in this case.
-func (s *memSeries) appendFloatHistogram(t int64, fh *histogram.FloatHistogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
+func (s *memSeries) appendFloatHistogram(st, t int64, fh *histogram.FloatHistogram, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) {
 	// Head controls the execution of recoding, so that we own the proper
 	// chunk reference afterwards and mmap used up chunks.

 	// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
 	prevApp, _ := s.app.(*chunkenc.FloatHistogramAppender)

-	c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.EncFloatHistogram, o)
+	c, sampleInOrder, chunkCreated := s.histogramsAppendPreprocessor(t, chunkenc.ValFloatHistogram.ChunkEncoding(o.useXOR2), o)
 	if !sampleInOrder {
 		return sampleInOrder, chunkCreated
 	}
@ -1936,8 +1947,7 @@ func (s *memSeries) appendFloatHistogram(t int64, fh *histogram.FloatHistogram,
 		prevApp = nil
 	}

-	// TODO(krajorama): pass ST.
-	newChunk, recoded, s.app, _ = s.app.AppendFloatHistogram(prevApp, 0, t, fh, false) // False means request a new chunk if needed.
+	newChunk, recoded, s.app, _ = s.app.AppendFloatHistogram(prevApp, st, t, fh, false) // False means request a new chunk if needed.

 	s.lastHistogramValue = nil
 	s.lastFloatHistogramValue = fh
@ -2161,8 +2171,8 @@ func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange

 // cutNewOOOHeadChunk cuts a new OOO chunk and m-maps the old chunk.
 // The caller must ensure that s is locked and s.ooo is not nil.
-func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) {
-	ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper, logger)
+func (s *memSeries) cutNewOOOHeadChunk(mint int64, o chunkOpts, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) {
+	ref := s.mmapCurrentOOOHeadChunk(o, logger)

 	s.ooo.oooHeadChunk = &oooHeadChunk{
 		chunk:   NewOOOChunk(),
@ -2174,12 +2184,12 @@ func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.Chunk
 }

 // s must be locked when calling.
-func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) []chunks.ChunkDiskMapperRef {
+func (s *memSeries) mmapCurrentOOOHeadChunk(o chunkOpts, logger *slog.Logger) []chunks.ChunkDiskMapperRef {
 	if s.ooo == nil || s.ooo.oooHeadChunk == nil {
 		// OOO is not enabled or there is no head chunk, so nothing to m-map here.
 		return nil
 	}
-	chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64)
+	chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, o.useXOR2)
 	if err != nil {
 		handleChunkWriteError(err)
 		return nil
@ -2190,7 +2200,7 @@ func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMap
 			logger.Error("Too many OOO chunks, dropping data", "series", s.lset.String())
 			break
 		}
-		chunkRef := chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError)
+		chunkRef := o.chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError)
 		chunkRefs = append(chunkRefs, chunkRef)
 		s.ooo.oooMmappedChunks = append(s.ooo.oooMmappedChunks, &mmappedChunk{
 			ref:        chunkRef,
--- a/tsdb/head_append_v2.go
+++ b/tsdb/head_append_v2.go
@ -95,6 +95,8 @@ func (h *Head) appenderV2() *headAppenderV2 {
 			typesInBatch:          h.getTypeMap(),
 			appendID:              appendID,
 			cleanupAppendIDsBelow: cleanupAppendIDsBelow,
+			storeST:               h.opts.EnableSTStorage.Load(),
+			useXOR2:               h.opts.EnableXOR2Encoding.Load(),
 		},
 	}
 }
@ -140,7 +142,6 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i
 		}
 	}

-	// TODO(bwplotka): Handle ST natively (as per PROM-60).
 	if a.head.opts.EnableSTAsZeroSample && st != 0 {
 		a.bestEffortAppendSTZeroSample(s, ls, st, t, h, fh)
 	}
@ -177,7 +178,7 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i
 			// we do not need to check for the difference between "unknown
 			// series" and "known series with stNone".
 		}
-		appErr = a.appendFloat(s, t, v, opts.RejectOutOfOrder)
+		appErr = a.appendFloat(s, st, t, v, opts.RejectOutOfOrder)
 	}
 	// Handle append error, if any.
 	if appErr != nil {
@ -218,7 +219,7 @@ func (a *headAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t i
 	return storage.SeriesRef(s.ref), partialErr
 }

-func (a *headAppenderV2) appendFloat(s *memSeries, t int64, v float64, fastRejectOOO bool) error {
+func (a *headAppenderV2) appendFloat(s *memSeries, st, t int64, v float64, fastRejectOOO bool) error {
 	s.Lock()
 	// TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise
 	// to skip that sample from the WAL and write only in the WBL.
@ -239,7 +240,7 @@ func (a *headAppenderV2) appendFloat(s *memSeries, t int64, v float64, fastRejec
 	}

 	b := a.getCurrentBatch(stFloat, s.ref)
-	b.floats = append(b.floats, record.RefSample{Ref: s.ref, T: t, V: v})
+	b.floats = append(b.floats, record.RefSample{Ref: s.ref, ST: st, T: t, V: v})
 	b.floatSeries = append(b.floatSeries, s)
 	return nil
 }
@ -366,7 +367,7 @@ func (a *headAppenderV2) bestEffortAppendSTZeroSample(s *memSeries, ls labels.La
 		}
 		err = a.appendHistogram(s, st, zeroHistogram, true)
 	default:
-		err = a.appendFloat(s, st, 0, true)
+		err = a.appendFloat(s, 0, st, 0, true)
 	}

 	if err != nil {
--- a/tsdb/head_append_v2_test.go
+++ b/tsdb/head_append_v2_test.go
@ -1865,296 +1865,300 @@ func TestHistogramInWALAndMmapChunk_AppenderV2(t *testing.T) {
 }

 func TestChunkSnapshot_AppenderV2(t *testing.T) {
-	head, _ := newTestHead(t, 120*4, compression.None, false)
-	defer func() {
-		head.opts.EnableMemorySnapshotOnShutdown = false
-		require.NoError(t, head.Close())
-	}()
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			head, _ := newTestHead(t, 120*4, compression.None, false)
+			defer func() {
+				head.opts.EnableMemorySnapshotOnShutdown = false
+				require.NoError(t, head.Close())
+			}()

-	type ex struct {
-		seriesLabels labels.Labels
-		e            exemplar.Exemplar
-	}
-
-	numSeries := 10
-	expSeries := make(map[string][]chunks.Sample)
-	expHist := make(map[string][]chunks.Sample)
-	expFloatHist := make(map[string][]chunks.Sample)
-	expTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
-	expExemplars := make([]ex, 0)
-	histograms := tsdbutil.GenerateTestGaugeHistograms(481)
-	floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481)
-
-	newExemplar := func(lbls labels.Labels, ts int64) exemplar.Exemplar {
-		e := ex{
-			seriesLabels: lbls,
-			e: exemplar.Exemplar{
-				Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())),
-				Value:  rand.Float64(),
-				Ts:     ts,
-			},
-		}
-		expExemplars = append(expExemplars, e)
-		return e.e
-	}
-
-	checkSamples := func() {
-		q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
-		require.NoError(t, err)
-		series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
-		require.Equal(t, expSeries, series)
-	}
-	checkHistograms := func() {
-		q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
-		require.NoError(t, err)
-		series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*"))
-		require.Equal(t, expHist, series)
-	}
-	checkFloatHistograms := func() {
-		q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
-		require.NoError(t, err)
-		series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*"))
-		require.Equal(t, expFloatHist, series)
-	}
-	checkTombstones := func() {
-		tr, err := head.Tombstones()
-		require.NoError(t, err)
-		actTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
-		require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error {
-			for _, itv := range itvs {
-				actTombstones[ref].Add(itv)
+			type ex struct {
+				seriesLabels labels.Labels
+				e            exemplar.Exemplar
+			}
+
+			numSeries := 10
+			expSeries := make(map[string][]chunks.Sample)
+			expHist := make(map[string][]chunks.Sample)
+			expFloatHist := make(map[string][]chunks.Sample)
+			expTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
+			expExemplars := make([]ex, 0)
+			histograms := tsdbutil.GenerateTestGaugeHistograms(481)
+			floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481)
+
+			newExemplar := func(lbls labels.Labels, ts int64) exemplar.Exemplar {
+				e := ex{
+					seriesLabels: lbls,
+					e: exemplar.Exemplar{
+						Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())),
+						Value:  rand.Float64(),
+						Ts:     ts,
+					},
+				}
+				expExemplars = append(expExemplars, e)
+				return e.e
+			}
+
+			checkSamples := func() {
+				q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
+				require.NoError(t, err)
+				series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+				require.Equal(t, expSeries, series)
+			}
+			checkHistograms := func() {
+				q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
+				require.NoError(t, err)
+				series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*"))
+				require.Equal(t, expHist, series)
+			}
+			checkFloatHistograms := func() {
+				q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64)
+				require.NoError(t, err)
+				series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*"))
+				require.Equal(t, expFloatHist, series)
+			}
+			checkTombstones := func() {
+				tr, err := head.Tombstones()
+				require.NoError(t, err)
+				actTombstones := make(map[storage.SeriesRef]tombstones.Intervals)
+				require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error {
+					for _, itv := range itvs {
+						actTombstones[ref].Add(itv)
+					}
+					return nil
+				}))
+				require.Equal(t, expTombstones, actTombstones)
+			}
+			checkExemplars := func() {
+				actExemplars := make([]ex, 0, len(expExemplars))
+				err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error {
+					actExemplars = append(actExemplars, ex{
+						seriesLabels: seriesLabels,
+						e:            e,
+					})
+					return nil
+				})
+				require.NoError(t, err)
+				// Verifies both existence of right exemplars and order of exemplars in the buffer.
+				testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})})
+			}
+
+			var (
+				wlast, woffset int
+				err            error
+			)
+
+			closeHeadAndCheckSnapshot := func() {
+				require.NoError(t, head.Close())
+
+				_, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot)
+				require.NoError(t, err)
+				require.Equal(t, wlast, sidx)
+				require.Equal(t, woffset, soffset)
+			}
+
+			openHeadAndCheckReplay := func() {
+				w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None)
+				require.NoError(t, err)
+				head, err = NewHead(nil, nil, w, nil, head.opts, nil)
+				require.NoError(t, err)
+				require.NoError(t, head.Init(math.MinInt64))
+
+				checkSamples()
+				checkHistograms()
+				checkFloatHistograms()
+				checkTombstones()
+				checkExemplars()
+			}
+
+			{ // Initial data that goes into snapshot.
+				// Add some initial samples with >=1 m-map chunk.
+				app := head.AppenderV2(context.Background())
+				for i := 1; i <= numSeries; i++ {
+					lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
+					lblStr := lbls.String()
+					lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
+					lblsHistStr := lblsHist.String()
+					lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
+					lblsFloatHistStr := lblsFloatHist.String()
+
+					// 240 samples should m-map at least 1 chunk.
+					for ts := int64(1); ts <= 240; ts++ {
+						// Add an exemplar, but only to float sample.
+						aOpts := storage.AOptions{}
+						if ts%10 == 0 {
+							aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
+						}
+						val := rand.Float64()
+						expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
+						_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
+						require.NoError(t, err)
+
+						hist := histograms[int(ts)]
+						expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
+						_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
+						require.NoError(t, err)
+
+						floatHist := floatHistogram[int(ts)]
+						expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
+						_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
+						require.NoError(t, err)
+
+						// Create multiple WAL records (commit).
+						if ts%10 == 0 {
+							require.NoError(t, app.Commit())
+							app = head.AppenderV2(context.Background())
+						}
+					}
+				}
+				require.NoError(t, app.Commit())
+
+				// Add some tombstones.
+				enc := record.Encoder{EnableSTStorage: enableSTStorage}
+				for i := 1; i <= numSeries; i++ {
+					ref := storage.SeriesRef(i)
+					itvs := tombstones.Intervals{
+						{Mint: 1234, Maxt: 2345},
+						{Mint: 3456, Maxt: 4567},
+					}
+					for _, itv := range itvs {
+						expTombstones[ref].Add(itv)
+					}
+					head.tombstones.AddInterval(ref, itvs...)
+					err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
+						{Ref: ref, Intervals: itvs},
+					}, nil))
+					require.NoError(t, err)
+				}
+			}
+
+			// These references should be the ones used for the snapshot.
+			wlast, woffset, err = head.wal.LastSegmentAndOffset()
+			require.NoError(t, err)
+			if woffset != 0 && woffset < 32*1024 {
+				// The page is always filled before taking the snapshot.
+				woffset = 32 * 1024
+			}
+
+			{
+				// Creating snapshot and verifying it.
+				head.opts.EnableMemorySnapshotOnShutdown = true
+				closeHeadAndCheckSnapshot() // This will create a snapshot.
+
+				// Test the replay of snapshot.
+				openHeadAndCheckReplay()
+			}
+
+			{ // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk.
+				// Add more samples.
+				app := head.AppenderV2(context.Background())
+				for i := 1; i <= numSeries; i++ {
+					lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
+					lblStr := lbls.String()
+					lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
+					lblsHistStr := lblsHist.String()
+					lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
+					lblsFloatHistStr := lblsFloatHist.String()
+
+					// 240 samples should m-map at least 1 chunk.
+					for ts := int64(241); ts <= 480; ts++ {
+						// Add an exemplar, but only to float sample.
+						aOpts := storage.AOptions{}
+						if ts%10 == 0 {
+							aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
+						}
+						val := rand.Float64()
+						expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
+						_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
+						require.NoError(t, err)
+
+						hist := histograms[int(ts)]
+						expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
+						_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
+						require.NoError(t, err)
+
+						floatHist := floatHistogram[int(ts)]
+						expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
+						_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
+						require.NoError(t, err)
+
+						// Create multiple WAL records (commit).
+						if ts%10 == 0 {
+							require.NoError(t, app.Commit())
+							app = head.AppenderV2(context.Background())
+						}
+					}
+				}
+				require.NoError(t, app.Commit())
+
+				// Add more tombstones.
+				enc := record.Encoder{EnableSTStorage: enableSTStorage}
+				for i := 1; i <= numSeries; i++ {
+					ref := storage.SeriesRef(i)
+					itvs := tombstones.Intervals{
+						{Mint: 12345, Maxt: 23456},
+						{Mint: 34567, Maxt: 45678},
+					}
+					for _, itv := range itvs {
+						expTombstones[ref].Add(itv)
+					}
+					head.tombstones.AddInterval(ref, itvs...)
+					err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
+						{Ref: ref, Intervals: itvs},
+					}, nil))
+					require.NoError(t, err)
+				}
+			}
+			{
+				// Close Head and verify that new snapshot was not created.
+				head.opts.EnableMemorySnapshotOnShutdown = false
+				closeHeadAndCheckSnapshot() // This should not create a snapshot.
+
+				// Test the replay of snapshot, m-map chunks, and WAL.
+				head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
+				openHeadAndCheckReplay()
+			}
+
+			// Creating another snapshot should delete the older snapshot and replay still works fine.
+			wlast, woffset, err = head.wal.LastSegmentAndOffset()
+			require.NoError(t, err)
+			if woffset != 0 && woffset < 32*1024 {
+				// The page is always filled before taking the snapshot.
+				woffset = 32 * 1024
+			}
+
+			{
+				// Close Head and verify that new snapshot was created.
+				closeHeadAndCheckSnapshot()
+
+				// Verify that there is only 1 snapshot.
+				files, err := os.ReadDir(head.opts.ChunkDirRoot)
+				require.NoError(t, err)
+				snapshots := 0
+				for i := len(files) - 1; i >= 0; i-- {
+					fi := files[i]
+					if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) {
+						snapshots++
+						require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name())
+					}
+				}
+				require.Equal(t, 1, snapshots)
+
+				// Test the replay of snapshot.
+				head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
+
+				// Disabling exemplars to check that it does not hard fail replay
+				// https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870.
+				head.opts.EnableExemplarStorage = false
+				head.opts.MaxExemplars.Store(0)
+				expExemplars = expExemplars[:0]
+
+				openHeadAndCheckReplay()
+
+				require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal))
 			}
-			return nil
-		}))
-		require.Equal(t, expTombstones, actTombstones)
-	}
-	checkExemplars := func() {
-		actExemplars := make([]ex, 0, len(expExemplars))
-		err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error {
-			actExemplars = append(actExemplars, ex{
-				seriesLabels: seriesLabels,
-				e:            e,
-			})
-			return nil
 		})
-		require.NoError(t, err)
-		// Verifies both existence of right exemplars and order of exemplars in the buffer.
-		testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})})
-	}
-
-	var (
-		wlast, woffset int
-		err            error
-	)
-
-	closeHeadAndCheckSnapshot := func() {
-		require.NoError(t, head.Close())
-
-		_, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot)
-		require.NoError(t, err)
-		require.Equal(t, wlast, sidx)
-		require.Equal(t, woffset, soffset)
-	}
-
-	openHeadAndCheckReplay := func() {
-		w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None)
-		require.NoError(t, err)
-		head, err = NewHead(nil, nil, w, nil, head.opts, nil)
-		require.NoError(t, err)
-		require.NoError(t, head.Init(math.MinInt64))
-
-		checkSamples()
-		checkHistograms()
-		checkFloatHistograms()
-		checkTombstones()
-		checkExemplars()
-	}
-
-	{ // Initial data that goes into snapshot.
-		// Add some initial samples with >=1 m-map chunk.
-		app := head.AppenderV2(context.Background())
-		for i := 1; i <= numSeries; i++ {
-			lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
-			lblStr := lbls.String()
-			lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
-			lblsHistStr := lblsHist.String()
-			lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
-			lblsFloatHistStr := lblsFloatHist.String()
-
-			// 240 samples should m-map at least 1 chunk.
-			for ts := int64(1); ts <= 240; ts++ {
-				// Add an exemplar, but only to float sample.
-				aOpts := storage.AOptions{}
-				if ts%10 == 0 {
-					aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
-				}
-				val := rand.Float64()
-				expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
-				_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
-				require.NoError(t, err)
-
-				hist := histograms[int(ts)]
-				expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
-				_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
-				require.NoError(t, err)
-
-				floatHist := floatHistogram[int(ts)]
-				expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
-				_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
-				require.NoError(t, err)
-
-				// Create multiple WAL records (commit).
-				if ts%10 == 0 {
-					require.NoError(t, app.Commit())
-					app = head.AppenderV2(context.Background())
-				}
-			}
-		}
-		require.NoError(t, app.Commit())
-
-		// Add some tombstones.
-		var enc record.Encoder
-		for i := 1; i <= numSeries; i++ {
-			ref := storage.SeriesRef(i)
-			itvs := tombstones.Intervals{
-				{Mint: 1234, Maxt: 2345},
-				{Mint: 3456, Maxt: 4567},
-			}
-			for _, itv := range itvs {
-				expTombstones[ref].Add(itv)
-			}
-			head.tombstones.AddInterval(ref, itvs...)
-			err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
-				{Ref: ref, Intervals: itvs},
-			}, nil))
-			require.NoError(t, err)
-		}
-	}
-
-	// These references should be the ones used for the snapshot.
-	wlast, woffset, err = head.wal.LastSegmentAndOffset()
-	require.NoError(t, err)
-	if woffset != 0 && woffset < 32*1024 {
-		// The page is always filled before taking the snapshot.
-		woffset = 32 * 1024
-	}
-
-	{
-		// Creating snapshot and verifying it.
-		head.opts.EnableMemorySnapshotOnShutdown = true
-		closeHeadAndCheckSnapshot() // This will create a snapshot.
-
-		// Test the replay of snapshot.
-		openHeadAndCheckReplay()
-	}
-
-	{ // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk.
-		// Add more samples.
-		app := head.AppenderV2(context.Background())
-		for i := 1; i <= numSeries; i++ {
-			lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i))
-			lblStr := lbls.String()
-			lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i))
-			lblsHistStr := lblsHist.String()
-			lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i))
-			lblsFloatHistStr := lblsFloatHist.String()
-
-			// 240 samples should m-map at least 1 chunk.
-			for ts := int64(241); ts <= 480; ts++ {
-				// Add an exemplar, but only to float sample.
-				aOpts := storage.AOptions{}
-				if ts%10 == 0 {
-					aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)}
-				}
-				val := rand.Float64()
-				expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil})
-				_, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts)
-				require.NoError(t, err)
-
-				hist := histograms[int(ts)]
-				expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil})
-				_, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{})
-				require.NoError(t, err)
-
-				floatHist := floatHistogram[int(ts)]
-				expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist})
-				_, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{})
-				require.NoError(t, err)
-
-				// Create multiple WAL records (commit).
-				if ts%10 == 0 {
-					require.NoError(t, app.Commit())
-					app = head.AppenderV2(context.Background())
-				}
-			}
-		}
-		require.NoError(t, app.Commit())
-
-		// Add more tombstones.
-		var enc record.Encoder
-		for i := 1; i <= numSeries; i++ {
-			ref := storage.SeriesRef(i)
-			itvs := tombstones.Intervals{
-				{Mint: 12345, Maxt: 23456},
-				{Mint: 34567, Maxt: 45678},
-			}
-			for _, itv := range itvs {
-				expTombstones[ref].Add(itv)
-			}
-			head.tombstones.AddInterval(ref, itvs...)
-			err := head.wal.Log(enc.Tombstones([]tombstones.Stone{
-				{Ref: ref, Intervals: itvs},
-			}, nil))
-			require.NoError(t, err)
-		}
-	}
-	{
-		// Close Head and verify that new snapshot was not created.
-		head.opts.EnableMemorySnapshotOnShutdown = false
-		closeHeadAndCheckSnapshot() // This should not create a snapshot.
-
-		// Test the replay of snapshot, m-map chunks, and WAL.
-		head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
-		openHeadAndCheckReplay()
-	}
-
-	// Creating another snapshot should delete the older snapshot and replay still works fine.
-	wlast, woffset, err = head.wal.LastSegmentAndOffset()
-	require.NoError(t, err)
-	if woffset != 0 && woffset < 32*1024 {
-		// The page is always filled before taking the snapshot.
-		woffset = 32 * 1024
-	}
-
-	{
-		// Close Head and verify that new snapshot was created.
-		closeHeadAndCheckSnapshot()
-
-		// Verify that there is only 1 snapshot.
-		files, err := os.ReadDir(head.opts.ChunkDirRoot)
-		require.NoError(t, err)
-		snapshots := 0
-		for i := len(files) - 1; i >= 0; i-- {
-			fi := files[i]
-			if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) {
-				snapshots++
-				require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name())
-			}
-		}
-		require.Equal(t, 1, snapshots)
-
-		// Test the replay of snapshot.
-		head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot.
-
-		// Disabling exemplars to check that it does not hard fail replay
-		// https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870.
-		head.opts.EnableExemplarStorage = false
-		head.opts.MaxExemplars.Store(0)
-		expExemplars = expExemplars[:0]
-
-		openHeadAndCheckReplay()
-
-		require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal))
 	}
 }

@ -2919,13 +2923,15 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot_AppenderV2(t *testing.T) {
 // TestWBLReplay checks the replay at a low level.
 func TestWBLReplay_AppenderV2(t *testing.T) {
 	for name, scenario := range sampleTypeScenarios {
-		t.Run(name, func(t *testing.T) {
-			testWBLReplayAppenderV2(t, scenario)
-		})
+		for _, enableSTstorage := range []bool{false, true} {
+			t.Run(fmt.Sprintf("%s/st-storage=%v", name, enableSTstorage), func(t *testing.T) {
+				testWBLReplayAppenderV2(t, scenario, enableSTstorage)
+			})
+		}
 	}
 }

-func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario) {
+func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario, enableSTstorage bool) {
 	dir := t.TempDir()
 	wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, compression.Snappy)
 	require.NoError(t, err)
@ -2936,6 +2942,8 @@ func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario) {
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = dir
 	opts.OutOfOrderTimeWindow.Store(30 * time.Minute.Milliseconds())
+	opts.EnableSTStorage.Store(enableSTstorage)
+	opts.EnableXOR2Encoding.Store(enableSTstorage)

 	h, err := NewHead(nil, nil, wal, oooWlog, opts, nil)
 	require.NoError(t, err)
@ -2987,7 +2995,7 @@ func testWBLReplayAppenderV2(t *testing.T, scenario sampleTypeScenario) {
 	require.False(t, ok)
 	require.NotNil(t, ms)

-	chks, err := ms.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64)
+	chks, err := ms.ooo.oooHeadChunk.chunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, h.opts.EnableXOR2Encoding.Load())
 	require.NoError(t, err)
 	require.Len(t, chks, 1)

@ -4748,3 +4756,135 @@ func TestHeadAppenderV2_Append_HistogramStalenessConversionMetrics(t *testing.T)
 		})
 	}
 }
+
+// TestHeadAppender_STStorage verifies that when EnableSTStorage is true,
+// start timestamps are properly stored in chunks and returned by queries.
+// This test uses AppenderV2 which has native ST support.
+func TestHeadAppenderV2_STStorage(t *testing.T) {
+	testHistogram := tsdbutil.GenerateTestHistogram(1)
+	testHistogram.CounterResetHint = histogram.NotCounterReset
+
+	type sampleData struct {
+		st      int64
+		ts      int64
+		fSample float64
+		h       *histogram.Histogram
+	}
+
+	testCases := []struct {
+		name        string
+		samples     []sampleData
+		expectedSTs []int64
+		isHistogram bool
+	}{
+		{
+			name: "Float samples with ST",
+			samples: []sampleData{
+				{st: 10, ts: 100, fSample: 1.0},
+				{st: 20, ts: 200, fSample: 2.0},
+				{st: 30, ts: 300, fSample: 3.0},
+			},
+			expectedSTs: []int64{10, 20, 30},
+			isHistogram: false,
+		},
+		{
+			name: "Float samples with varying ST",
+			samples: []sampleData{
+				{st: 5, ts: 100, fSample: 1.0},
+				{st: 5, ts: 200, fSample: 2.0},
+				{st: 150, ts: 300, fSample: 3.0},
+			},
+			expectedSTs: []int64{5, 5, 150},
+			isHistogram: false,
+		},
+		{
+			name: "Histogram samples",
+			samples: []sampleData{
+				{st: 10, ts: 100, h: testHistogram},
+				{st: 20, ts: 200, h: testHistogram},
+				{st: 30, ts: 300, h: testHistogram},
+			},
+			// Histograms don't support ST storage yet, should return 0.
+			expectedSTs: []int64{0, 0, 0},
+			isHistogram: true,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			opts := newTestHeadDefaultOptions(DefaultBlockDuration, false)
+			opts.EnableSTStorage.Store(true)
+			opts.EnableXOR2Encoding.Store(true)
+			h, _ := newTestHeadWithOptions(t, compression.None, opts)
+
+			lbls := labels.FromStrings("foo", "bar")
+
+			a := h.AppenderV2(context.Background())
+			for _, s := range tc.samples {
+				_, err := a.Append(0, lbls, s.st, s.ts, s.fSample, s.h, nil, storage.AOptions{})
+				require.NoError(t, err)
+			}
+			require.NoError(t, a.Commit())
+
+			// Verify ST values are stored in chunks.
+			ctx := context.Background()
+			idxReader, err := h.Index()
+			require.NoError(t, err)
+			defer idxReader.Close()
+
+			chkReader, err := h.Chunks()
+			require.NoError(t, err)
+			defer chkReader.Close()
+
+			p, err := idxReader.Postings(ctx, "foo", "bar")
+			require.NoError(t, err)
+
+			var lblBuilder labels.ScratchBuilder
+			require.True(t, p.Next())
+			sRef := p.At()
+
+			var chkMetas []chunks.Meta
+			require.NoError(t, idxReader.Series(sRef, &lblBuilder, &chkMetas))
+
+			var actualSTs []int64
+			for _, meta := range chkMetas {
+				chk, iterable, err := chkReader.ChunkOrIterable(meta)
+				require.NoError(t, err)
+				require.Nil(t, iterable)
+
+				it := chk.Iterator(nil)
+				for it.Next() != chunkenc.ValNone {
+					st := it.AtST()
+					actualSTs = append(actualSTs, st)
+				}
+				require.NoError(t, it.Err())
+			}
+
+			if tc.isHistogram {
+				require.Equal(t, tc.expectedSTs, actualSTs, "Histogram samples should return 0 for ST")
+			} else {
+				require.Equal(t, tc.expectedSTs, actualSTs, "Float samples should have ST stored")
+			}
+
+			// Also verify via querier.
+			q, err := NewBlockQuerier(h, math.MinInt64, math.MaxInt64)
+			require.NoError(t, err)
+			defer q.Close()
+
+			ss := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+			require.True(t, ss.Next())
+			series := ss.At()
+			require.NoError(t, ss.Err())
+
+			seriesIt := series.Iterator(nil)
+			var queriedSTs []int64
+			for seriesIt.Next() != chunkenc.ValNone {
+				st := seriesIt.AtST()
+				queriedSTs = append(queriedSTs, st)
+			}
+			require.NoError(t, seriesIt.Err())
+
+			require.Equal(t, tc.expectedSTs, queriedSTs, "Querier should return same ST values as chunk iterator")
+		})
+	}
+}
--- a/tsdb/head_read_test.go
+++ b/tsdb/head_read_test.go
@ -33,7 +33,7 @@ func TestMemSeries_chunk(t *testing.T) {

 	appendSamples := func(t *testing.T, s *memSeries, start, end int64, cdm *chunks.ChunkDiskMapper) {
 		for i := start; i < end; i += chunkStep {
-			ok, _ := s.append(i, float64(i), 0, chunkOpts{
+			ok, _ := s.append(0, i, float64(i), 0, chunkOpts{
 				chunkDiskMapper: cdm,
 				chunkRange:      chunkRange,
 				samplesPerChunk: DefaultSamplesPerChunk,
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
--- a/tsdb/head_wal.go
+++ b/tsdb/head_wal.go
@ -169,7 +169,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
 					return
 				}
 				decoded <- series
-			case record.Samples:
+			case record.Samples, record.SamplesV2:
 				samples := h.wlReplaySamplesPool.Get()[:0]
 				samples, err = dec.Samples(r.Record(), samples)
 				if err != nil {
@ -646,6 +646,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
 		chunkDiskMapper: h.chunkDiskMapper,
 		chunkRange:      h.chunkRange.Load(),
 		samplesPerChunk: h.opts.SamplesPerChunk,
+		useXOR2:         h.opts.EnableXOR2Encoding.Load(),
 	}

 	for in := range wp.input {
@ -676,7 +677,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
 				h.numStaleSeries.Dec()
 			}

-			if _, chunkCreated := ms.append(s.T, s.V, 0, appendChunkOpts); chunkCreated {
+			if _, chunkCreated := ms.append(s.ST, s.T, s.V, 0, appendChunkOpts); chunkCreated {
 				h.metrics.chunksCreated.Inc()
 				h.metrics.chunks.Inc()
 				_ = ms.mmapChunks(h.chunkDiskMapper)
@ -713,14 +714,16 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
 					newlyStale = newlyStale && !value.IsStaleNaN(ms.lastHistogramValue.Sum)
 					staleToNonStale = value.IsStaleNaN(ms.lastHistogramValue.Sum) && !value.IsStaleNaN(s.h.Sum)
 				}
-				_, chunkCreated = ms.appendHistogram(s.t, s.h, 0, appendChunkOpts)
+				// TODO(krajorama,ywwg): Pass ST when available in WBL.
+				_, chunkCreated = ms.appendHistogram(0, s.t, s.h, 0, appendChunkOpts)
 			} else {
 				newlyStale = value.IsStaleNaN(s.fh.Sum)
 				if ms.lastFloatHistogramValue != nil {
 					newlyStale = newlyStale && !value.IsStaleNaN(ms.lastFloatHistogramValue.Sum)
 					staleToNonStale = value.IsStaleNaN(ms.lastFloatHistogramValue.Sum) && !value.IsStaleNaN(s.fh.Sum)
 				}
-				_, chunkCreated = ms.appendFloatHistogram(s.t, s.fh, 0, appendChunkOpts)
+				// TODO(krajorama,ywwg): Pass ST when available in WBL.
+				_, chunkCreated = ms.appendFloatHistogram(0, s.t, s.fh, 0, appendChunkOpts)
 			}
 			if newlyStale {
 				h.numStaleSeries.Inc()
@ -809,7 +812,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
 			var err error
 			rec := r.Record()
 			switch dec.Type(rec) {
-			case record.Samples:
+			case record.Samples, record.SamplesV2:
 				samples := h.wlReplaySamplesPool.Get()[:0]
 				samples, err = dec.Samples(rec, samples)
 				if err != nil {
@ -1090,6 +1093,12 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesR
 	var unknownSampleRefs, unknownHistogramRefs uint64

 	oooCapMax := h.opts.OutOfOrderCapMax.Load()
+	appendChunkOpts := chunkOpts{
+		chunkDiskMapper: h.chunkDiskMapper,
+		chunkRange:      h.chunkRange.Load(),
+		samplesPerChunk: h.opts.SamplesPerChunk,
+		useXOR2:         h.opts.EnableXOR2Encoding.Load(),
+	}
 	// We don't check for minValidTime for ooo samples.
 	mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
 	for in := range wp.input {
@ -1109,7 +1118,7 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesR
 				missingSeries[s.Ref] = struct{}{}
 				continue
 			}
-			ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax, h.logger)
+			ok, chunkCreated, _ := ms.insert(s.ST, s.T, s.V, nil, nil, appendChunkOpts, oooCapMax, h.logger)
 			if chunkCreated {
 				h.metrics.chunksCreated.Inc()
 				h.metrics.chunks.Inc()
@ -1137,9 +1146,11 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesR
 			var chunkCreated bool
 			var ok bool
 			if s.h != nil {
-				ok, chunkCreated, _ = ms.insert(s.t, 0, s.h, nil, h.chunkDiskMapper, oooCapMax, h.logger)
+				// TODO(krajorama,ywwg): Pass ST when available in WBL.
+				ok, chunkCreated, _ = ms.insert(0, s.t, 0, s.h, nil, appendChunkOpts, oooCapMax, h.logger)
 			} else {
-				ok, chunkCreated, _ = ms.insert(s.t, 0, nil, s.fh, h.chunkDiskMapper, oooCapMax, h.logger)
+				// TODO(krajorama,ywwg): Pass ST when available in WBL.
+				ok, chunkCreated, _ = ms.insert(0, s.t, 0, nil, s.fh, appendChunkOpts, oooCapMax, h.logger)
 			}
 			if chunkCreated {
 				h.metrics.chunksCreated.Inc()
@ -1253,7 +1264,7 @@ func decodeSeriesFromChunkSnapshot(d *record.Decoder, b []byte) (csr chunkSnapsh
 	csr.mc.chunk = chk

 	switch enc {
-	case chunkenc.EncXOR:
+	case chunkenc.EncXOR, chunkenc.EncXOR2:
 		// Backwards-compatibility for old sampleBuf which had last 4 samples.
 		for range 3 {
 			_ = dec.Be64int64()
@ -1413,7 +1424,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) {
 	// Assuming 100 bytes (overestimate) per exemplar, that's ~1MB.
 	maxExemplarsPerRecord := 10000
 	batch := make([]record.RefExemplar, 0, maxExemplarsPerRecord)
-	enc := record.Encoder{}
+	enc := record.Encoder{EnableSTStorage: h.opts.EnableSTStorage.Load()}
 	flushExemplars := func() error {
 		if len(batch) == 0 {
 			return nil
--- a/tsdb/ooo_head.go
+++ b/tsdb/ooo_head.go
@ -34,14 +34,13 @@ func NewOOOChunk() *OOOChunk {

 // Insert inserts the sample such that order is maintained.
 // Returns false if insert was not possible due to the same timestamp already existing.
-func (o *OOOChunk) Insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram) bool {
+func (o *OOOChunk) Insert(st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram) bool {
 	// Although out-of-order samples can be out-of-order amongst themselves, we
 	// are opinionated and expect them to be usually in-order meaning we could
 	// try to append at the end first if the new timestamp is higher than the
 	// last known timestamp.
 	if len(o.samples) == 0 || t > o.samples[len(o.samples)-1].t {
-		// TODO(krajorama): pass ST.
-		o.samples = append(o.samples, sample{0, t, v, h, fh})
+		o.samples = append(o.samples, sample{st, t, v, h, fh})
 		return true
 	}

@ -50,8 +49,7 @@ func (o *OOOChunk) Insert(t int64, v float64, h *histogram.Histogram, fh *histog

 	if i >= len(o.samples) {
 		// none found. append it at the end
-		// TODO(krajorama): pass ST.
-		o.samples = append(o.samples, sample{0, t, v, h, fh})
+		o.samples = append(o.samples, sample{st, t, v, h, fh})
 		return true
 	}

@ -63,8 +61,7 @@ func (o *OOOChunk) Insert(t int64, v float64, h *histogram.Histogram, fh *histog
 	// Expand length by 1 to make room. use a zero sample, we will overwrite it anyway.
 	o.samples = append(o.samples, sample{})
 	copy(o.samples[i+1:], o.samples[i:])
-	// TODO(krajorama): pass ST.
-	o.samples[i] = sample{0, t, v, h, fh}
+	o.samples[i] = sample{st, t, v, h, fh}

 	return true
 }
@ -76,7 +73,7 @@ func (o *OOOChunk) NumSamples() int {
 // ToEncodedChunks returns chunks with the samples in the OOOChunk.
 //
 //nolint:revive
-func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error) {
+func (o *OOOChunk) ToEncodedChunks(mint, maxt int64, useXOR2 bool) (chks []memChunk, err error) {
 	if len(o.samples) == 0 {
 		return nil, nil
 	}
@ -96,10 +93,13 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
 		if s.t > maxt {
 			break
 		}
-		encoding := chunkenc.EncXOR
-		if s.h != nil {
+		encoding := chunkenc.ValFloat.ChunkEncoding(useXOR2)
+		switch {
+		case s.h != nil:
+			// TODO(krajorama): use ST capable histogram chunk.
 			encoding = chunkenc.EncHistogram
-		} else if s.fh != nil {
+		case s.fh != nil:
+			// TODO(krajorama): use ST capable float histogram chunk.
 			encoding = chunkenc.EncFloatHistogram
 		}

@ -111,15 +111,11 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
 				chks = append(chks, memChunk{chunk, cmint, cmaxt, nil})
 			}
 			cmint = s.t
-			switch encoding {
-			case chunkenc.EncXOR:
-				chunk = chunkenc.NewXORChunk()
-			case chunkenc.EncHistogram:
-				chunk = chunkenc.NewHistogramChunk()
-			case chunkenc.EncFloatHistogram:
-				chunk = chunkenc.NewFloatHistogramChunk()
-			default:
-				chunk = chunkenc.NewXORChunk()
+			chunk, err = chunkenc.NewEmptyChunk(encoding)
+			if err != nil {
+				// This should never happen. No point using a default type as
+				// calling the wrong append function would panic.
+				return chks, err
 			}
 			app, err = chunk.Appender()
 			if err != nil {
@ -127,18 +123,17 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
 			}
 		}
 		switch encoding {
-		case chunkenc.EncXOR:
-			// TODO(krajorama): pass ST.
-			app.Append(0, s.t, s.f)
+		case chunkenc.EncXOR, chunkenc.EncXOR2:
+			app.Append(s.st, s.t, s.f)
 		case chunkenc.EncHistogram:
+			// TODO(krajorama): handle ST capable histogram chunk.
 			// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
 			prevHApp, _ := prevApp.(*chunkenc.HistogramAppender)
 			var (
 				newChunk chunkenc.Chunk
 				recoded  bool
 			)
-			// TODO(krajorama): pass ST.
-			newChunk, recoded, app, _ = app.AppendHistogram(prevHApp, 0, s.t, s.h, false)
+			newChunk, recoded, app, _ = app.AppendHistogram(prevHApp, s.st, s.t, s.h, false)
 			if newChunk != nil { // A new chunk was allocated.
 				if !recoded {
 					chks = append(chks, memChunk{chunk, cmint, cmaxt, nil})
@ -147,14 +142,14 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
 				chunk = newChunk
 			}
 		case chunkenc.EncFloatHistogram:
+			// TODO(krajorama): handle ST capable float histogram chunk.
 			// Ignoring ok is ok, since we don't want to compare to the wrong previous appender anyway.
 			prevHApp, _ := prevApp.(*chunkenc.FloatHistogramAppender)
 			var (
 				newChunk chunkenc.Chunk
 				recoded  bool
 			)
-			// TODO(krajorama): pass ST.
-			newChunk, recoded, app, _ = app.AppendFloatHistogram(prevHApp, 0, s.t, s.fh, false)
+			newChunk, recoded, app, _ = app.AppendFloatHistogram(prevHApp, s.st, s.t, s.fh, false)
 			if newChunk != nil { // A new chunk was allocated.
 				if !recoded {
 					chks = append(chks, memChunk{chunk, cmint, cmaxt, nil})
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@ -77,7 +77,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
 	*chks = (*chks)[:0]

 	if s.ooo != nil {
-		return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, oh.inoMint, chks)
+		return getOOOSeriesChunks(s, oh.head.opts.EnableXOR2Encoding.Load(), oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, oh.inoMint, chks)
 	}
 	*chks = appendSeriesChunks(s, oh.inoMint, oh.maxt, *chks)
 	return nil
@ -88,7 +88,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
 //
 // maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then
 // the oooHeadChunk will not be considered.
-func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, inoMint int64, chks *[]chunks.Meta) error {
+func getOOOSeriesChunks(s *memSeries, useXOR2 bool, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, inoMint int64, chks *[]chunks.Meta) error {
 	tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks))

 	addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) {
@ -106,7 +106,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 		if c.OverlapsClosedInterval(mint, maxt) && maxMmapRef == 0 {
 			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(len(s.ooo.oooMmappedChunks))))
 			if len(c.chunk.samples) > 0 { // Empty samples happens in tests, at least.
-				chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(c.minTime, c.maxTime)
+				chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(c.minTime, c.maxTime, useXOR2)
 				if err != nil {
 					handleChunkWriteError(err)
 					return nil
@ -347,7 +347,7 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead,
 		}

 		var lastMmapRef chunks.ChunkDiskMapperRef
-		mmapRefs := ms.mmapCurrentOOOHeadChunk(head.chunkDiskMapper, head.logger)
+		mmapRefs := ms.mmapCurrentOOOHeadChunk(chunkOpts{chunkDiskMapper: head.chunkDiskMapper, useXOR2: head.opts.EnableXOR2Encoding.Load()}, head.logger)
 		if len(mmapRefs) == 0 && len(ms.ooo.oooMmappedChunks) > 0 {
 			// Nothing was m-mapped. So take the mmapRef from the existing slice if it exists.
 			mmapRefs = []chunks.ChunkDiskMapperRef{ms.ooo.oooMmappedChunks[len(ms.ooo.oooMmappedChunks)-1].ref}
@ -481,7 +481,7 @@ func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *l
 		return nil
 	}

-	return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, 0, chks)
+	return getOOOSeriesChunks(s, ir.ch.head.opts.EnableXOR2Encoding.Load(), ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, 0, chks)
 }

 func (*OOOCompactionHeadIndexReader) SortedLabelValues(_ context.Context, _ string, _ *storage.LabelHints, _ ...*labels.Matcher) ([]string, error) {
--- a/tsdb/ooo_head_test.go
+++ b/tsdb/ooo_head_test.go
@ -31,10 +31,11 @@ const testMaxSize int = 32
 func valEven(pos int) int64 { return int64(pos*2 + 2) } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values
 func valOdd(pos int) int64  { return int64(pos*2 + 1) } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals.

-func makeEvenSampleSlice(n int, sampleFunc func(ts int64) sample) []sample {
+func makeEvenSampleSlice(n int, sampleFunc func(st, ts int64) sample) []sample {
 	s := make([]sample, n)
 	for i := range n {
-		s[i] = sampleFunc(valEven(i))
+		ts := valEven(i)
+		s[i] = sampleFunc(ts, ts) // Use ts as st for consistency
 	}
 	return s
 }
@ -43,23 +44,50 @@ func makeEvenSampleSlice(n int, sampleFunc func(ts int64) sample) []sample {
 // - Number of pre-existing samples anywhere from 0 to testMaxSize-1.
 // - Insert new sample before first pre-existing samples, after the last, and anywhere in between.
 // - With a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves.
+// - With st=0 and st!=0 to verify ordering is based on sample.t, not sample.st.
 func TestOOOInsert(t *testing.T) {
 	scenarios := map[string]struct {
-		sampleFunc func(ts int64) sample
+		sampleFunc func(st, ts int64) sample
 	}{
-		"float": {
-			sampleFunc: func(ts int64) sample {
-				return sample{t: ts, f: float64(ts)}
+		"float st=0": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 0, t: ts, f: float64(ts)}
 			},
 		},
-		"integer histogram": {
-			sampleFunc: func(ts int64) sample {
-				return sample{t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
+		"float st=ts": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts, t: ts, f: float64(ts)}
 			},
 		},
-		"float histogram": {
-			sampleFunc: func(ts int64) sample {
-				return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
+		"float st=ts-100": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts - 100, t: ts, f: float64(ts)}
+			},
+		},
+		"float st descending while t ascending": {
+			// st values go in opposite direction of t to ensure ordering is by t.
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 1000 - ts, t: ts, f: float64(ts)}
+			},
+		},
+		"integer histogram st=0": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 0, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
+			},
+		},
+		"integer histogram st=ts": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
+			},
+		},
+		"float histogram st=0": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 0, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
+			},
+		},
+		"float histogram st=ts": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
 			},
 		},
 	}
@ -71,7 +99,7 @@ func TestOOOInsert(t *testing.T) {
 }

 func testOOOInsert(t *testing.T,
-	sampleFunc func(ts int64) sample,
+	sampleFunc func(st, ts int64) sample,
 ) {
 	for numPreExisting := 0; numPreExisting <= testMaxSize; numPreExisting++ {
 		// For example, if we have numPreExisting 2, then:
@ -84,19 +112,22 @@ func testOOOInsert(t *testing.T,
 			chunk := NewOOOChunk()
 			chunk.samples = make([]sample, numPreExisting)
 			chunk.samples = makeEvenSampleSlice(numPreExisting, sampleFunc)
-			newSample := sampleFunc(valOdd(insertPos))
-			chunk.Insert(newSample.t, newSample.f, newSample.h, newSample.fh)
+			ts := valOdd(insertPos)
+			newSample := sampleFunc(ts, ts) // Use ts as st for consistency
+			chunk.Insert(newSample.st, newSample.t, newSample.f, newSample.h, newSample.fh)

 			var expSamples []sample
 			// Our expected new samples slice, will be first the original samples.
 			for i := 0; i < insertPos; i++ {
-				expSamples = append(expSamples, sampleFunc(valEven(i)))
+				ts := valEven(i)
+				expSamples = append(expSamples, sampleFunc(ts, ts))
 			}
 			// Then the new sample.
 			expSamples = append(expSamples, newSample)
 			// Followed by any original samples that were pushed back by the new one.
 			for i := insertPos; i < numPreExisting; i++ {
-				expSamples = append(expSamples, sampleFunc(valEven(i)))
+				ts := valEven(i)
+				expSamples = append(expSamples, sampleFunc(ts, ts))
 			}

 			require.Equal(t, expSamples, chunk.samples, "numPreExisting %d, insertPos %d", numPreExisting, insertPos)
@ -107,23 +138,50 @@ func testOOOInsert(t *testing.T,
 // TestOOOInsertDuplicate tests the correct behavior when inserting a sample that is a duplicate of any
 // pre-existing samples, with between 1 and testMaxSize pre-existing samples and
 // with a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves.
+// With st=0 and st!=0 to verify duplicate detection is based on sample.t, not sample.st.
 func TestOOOInsertDuplicate(t *testing.T) {
 	scenarios := map[string]struct {
-		sampleFunc func(ts int64) sample
+		sampleFunc func(st, ts int64) sample
 	}{
-		"float": {
-			sampleFunc: func(ts int64) sample {
-				return sample{t: ts, f: float64(ts)}
+		"float st=0": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 0, t: ts, f: float64(ts)}
 			},
 		},
-		"integer histogram": {
-			sampleFunc: func(ts int64) sample {
-				return sample{t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
+		"float st=ts": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts, t: ts, f: float64(ts)}
 			},
 		},
-		"float histogram": {
-			sampleFunc: func(ts int64) sample {
-				return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
+		"float st=ts-100": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts - 100, t: ts, f: float64(ts)}
+			},
+		},
+		"float st descending while t ascending": {
+			// st values go in opposite direction of t to ensure duplicate detection is by t.
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 1000 - ts, t: ts, f: float64(ts)}
+			},
+		},
+		"integer histogram st=0": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 0, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
+			},
+		},
+		"integer histogram st=ts": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts, t: ts, h: tsdbutil.GenerateTestHistogram(ts)}
+			},
+		},
+		"float histogram st=0": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: 0, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
+			},
+		},
+		"float histogram st=ts": {
+			sampleFunc: func(st, ts int64) sample {
+				return sample{st: ts, t: ts, fh: tsdbutil.GenerateTestFloatHistogram(ts)}
 			},
 		},
 	}
@ -135,7 +193,7 @@ func TestOOOInsertDuplicate(t *testing.T) {
 }

 func testOOOInsertDuplicate(t *testing.T,
-	sampleFunc func(ts int64) sample,
+	sampleFunc func(st, ts int64) sample,
 ) {
 	for num := 1; num <= testMaxSize; num++ {
 		for dupPos := 0; dupPos < num; dupPos++ {
@ -145,7 +203,7 @@ func testOOOInsertDuplicate(t *testing.T,
 			dupSample := chunk.samples[dupPos]
 			dupSample.f = 0.123

-			ok := chunk.Insert(dupSample.t, dupSample.f, dupSample.h, dupSample.fh)
+			ok := chunk.Insert(dupSample.st, dupSample.t, dupSample.f, dupSample.h, dupSample.fh)

 			expSamples := makeEvenSampleSlice(num, sampleFunc) // We expect no change.
 			require.False(t, ok)
@ -252,17 +310,17 @@ func TestOOOChunks_ToEncodedChunks(t *testing.T) {
 			for _, s := range tc.samples {
 				switch s.Type() {
 				case chunkenc.ValFloat:
-					oooChunk.Insert(s.t, s.f, nil, nil)
+					oooChunk.Insert(s.st, s.t, s.f, nil, nil)
 				case chunkenc.ValHistogram:
-					oooChunk.Insert(s.t, 0, s.h.Copy(), nil)
+					oooChunk.Insert(s.st, s.t, 0, s.h.Copy(), nil)
 				case chunkenc.ValFloatHistogram:
-					oooChunk.Insert(s.t, 0, nil, s.fh.Copy())
+					oooChunk.Insert(s.st, s.t, 0, nil, s.fh.Copy())
 				default:
 					t.Fatalf("unexpected sample type %d", s.Type())
 				}
 			}

-			chunks, err := oooChunk.ToEncodedChunks(math.MinInt64, math.MaxInt64)
+			chunks, err := oooChunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, false)
 			require.NoError(t, err)
 			require.Len(t, chunks, len(tc.expectedChunks), "number of chunks")
 			sampleIndex := 0
@ -308,3 +366,87 @@ func TestOOOChunks_ToEncodedChunks(t *testing.T) {
 		})
 	}
 }
+
+// TestOOOChunks_ToEncodedChunks_WithST tests ToEncodedChunks with useXOR2=true and useXOR2=false for float samples.
+// When useXOR2=true, st values are preserved; when useXOR2=false, AtST() returns 0.
+// TODO(@krajorama): Add histogram test cases once ST storage is implemented for histograms.
+func TestOOOChunks_ToEncodedChunks_WithST(t *testing.T) {
+	testCases := map[string]struct {
+		samples []sample
+	}{
+		"floats with st=0": {
+			samples: []sample{
+				{st: 0, t: 1000, f: 43.0},
+				{st: 0, t: 1100, f: 42.0},
+			},
+		},
+		"floats with st=t": {
+			samples: []sample{
+				{st: 1000, t: 1000, f: 43.0},
+				{st: 1100, t: 1100, f: 42.0},
+			},
+		},
+		"floats with st=t-100": {
+			samples: []sample{
+				{st: 900, t: 1000, f: 43.0},
+				{st: 1000, t: 1100, f: 42.0},
+			},
+		},
+		"floats with varying st": {
+			samples: []sample{
+				{st: 500, t: 1000, f: 43.0},
+				{st: 1100, t: 1100, f: 42.0}, // st == t
+				{st: 0, t: 1200, f: 41.0},    // st == 0
+			},
+		},
+	}
+
+	storageScenarios := []struct {
+		name             string
+		useXOR2          bool
+		expectedEncoding chunkenc.Encoding
+	}{
+		{"useXOR2=true", true, chunkenc.EncXOR2},
+		{"useXOR2=false", false, chunkenc.EncXOR},
+	}
+
+	for name, tc := range testCases {
+		for _, ss := range storageScenarios {
+			t.Run(name+"/"+ss.name, func(t *testing.T) {
+				oooChunk := OOOChunk{}
+				for _, s := range tc.samples {
+					oooChunk.Insert(s.st, s.t, s.f, nil, nil)
+				}
+
+				chunks, err := oooChunk.ToEncodedChunks(math.MinInt64, math.MaxInt64, ss.useXOR2)
+				require.NoError(t, err)
+				require.Len(t, chunks, 1, "number of chunks")
+
+				c := chunks[0]
+				require.Equal(t, ss.expectedEncoding, c.chunk.Encoding(), "chunk encoding")
+				require.Equal(t, tc.samples[0].t, c.minTime, "chunk minTime")
+				require.Equal(t, tc.samples[len(tc.samples)-1].t, c.maxTime, "chunk maxTime")
+
+				// Verify samples can be read back with correct st and t values.
+				it := c.chunk.Iterator(nil)
+				sampleIndex := 0
+				for it.Next() == chunkenc.ValFloat {
+					gotT, gotF := it.At()
+					gotST := it.AtST()
+
+					if ss.useXOR2 {
+						// When useXOR2=true, st values should be preserved.
+						require.Equal(t, tc.samples[sampleIndex].st, gotST, "sample %d st", sampleIndex)
+					} else {
+						// When useXOR2=false, AtST() should return 0.
+						require.Equal(t, int64(0), gotST, "sample %d st should be 0 when useXOR2=false", sampleIndex)
+					}
+					require.Equal(t, tc.samples[sampleIndex].t, gotT, "sample %d t", sampleIndex)
+					require.Equal(t, tc.samples[sampleIndex].f, gotF, "sample %d f", sampleIndex)
+					sampleIndex++
+				}
+				require.Equal(t, len(tc.samples), sampleIndex, "number of samples")
+			})
+		}
+	}
+}
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@ -866,7 +866,6 @@ func (p *populateWithDelChunkSeriesIterator) Next() bool {

 // populateCurrForSingleChunk sets the fields within p.currMetaWithChunk. This
 // should be called if the samples in p.currDelIter only form one chunk.
-// TODO(krajorama): test ST when chunks support it.
 func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool {
 	valueType := p.currDelIter.Next()
 	if valueType == chunkenc.ValNone {
@ -885,60 +884,47 @@ func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool {
 		st, t    int64
 		err      error
 	)
-	switch valueType {
-	case chunkenc.ValHistogram:
-		newChunk = chunkenc.NewHistogramChunk()
-		if app, err = newChunk.Appender(); err != nil {
+	newChunk, err = chunkenc.NewEmptyChunk(p.currMeta.Chunk.Encoding())
+	if err != nil {
+		p.err = fmt.Errorf("create new chunk while re-encoding: %w", err)
+		return false
+	}
+	app, err = newChunk.Appender()
+	if err != nil {
+		p.err = fmt.Errorf("create appender while re-encoding: %w", err)
+		return false
+	}
+
+loop:
+	for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
+		if vt != valueType {
+			err = fmt.Errorf("found value type %v in chunk with %v", vt, valueType)
 			break
 		}
-		for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
-			if vt != chunkenc.ValHistogram {
-				err = fmt.Errorf("found value type %v in histogram chunk", vt)
-				break
-			}
-			var h *histogram.Histogram
-			t, h = p.currDelIter.AtHistogram(nil)
-			st = p.currDelIter.AtST()
-			_, _, app, err = app.AppendHistogram(nil, st, t, h, true)
-			if err != nil {
-				break
-			}
-		}
-	case chunkenc.ValFloat:
-		newChunk = chunkenc.NewXORChunk()
-		if app, err = newChunk.Appender(); err != nil {
-			break
-		}
-		for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
-			if vt != chunkenc.ValFloat {
-				err = fmt.Errorf("found value type %v in float chunk", vt)
-				break
-			}
+		st = p.currDelIter.AtST()
+		switch vt {
+		case chunkenc.ValFloat:
 			var v float64
 			t, v = p.currDelIter.At()
-			st = p.currDelIter.AtST()
 			app.Append(st, t, v)
-		}
-	case chunkenc.ValFloatHistogram:
-		newChunk = chunkenc.NewFloatHistogramChunk()
-		if app, err = newChunk.Appender(); err != nil {
-			break
-		}
-		for vt := valueType; vt != chunkenc.ValNone; vt = p.currDelIter.Next() {
-			if vt != chunkenc.ValFloatHistogram {
-				err = fmt.Errorf("found value type %v in histogram chunk", vt)
-				break
+		case chunkenc.ValHistogram:
+			var h *histogram.Histogram
+			t, h = p.currDelIter.AtHistogram(nil)
+			_, _, app, err = app.AppendHistogram(nil, st, t, h, true)
+			if err != nil {
+				break loop
 			}
+		case chunkenc.ValFloatHistogram:
 			var h *histogram.FloatHistogram
 			t, h = p.currDelIter.AtFloatHistogram(nil)
-			st = p.currDelIter.AtST()
 			_, _, app, err = app.AppendFloatHistogram(nil, st, t, h, true)
 			if err != nil {
-				break
+				break loop
 			}
+		default:
+			err = fmt.Errorf("populateCurrForSingleChunk: value type %v unsupported", valueType)
+			break loop
 		}
-	default:
-		err = fmt.Errorf("populateCurrForSingleChunk: value type %v unsupported", valueType)
 	}

 	if err != nil {
@ -958,7 +944,6 @@ func (p *populateWithDelChunkSeriesIterator) populateCurrForSingleChunk() bool {
 // populateChunksFromIterable reads the samples from currDelIter to create
 // chunks for chunksFromIterable. It also sets p.currMetaWithChunk to the first
 // chunk.
-// TODO(krajorama): test ST when chunks support it.
 func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
 	p.chunksFromIterable = p.chunksFromIterable[:0]
 	p.chunksFromIterableIdx = -1
@ -982,30 +967,37 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {

 		app chunkenc.Appender

-		newChunk chunkenc.Chunk
-		recoded  bool
-
 		err error
 	)

 	prevValueType := chunkenc.ValNone
+	hasTS := false

 	for currentValueType := firstValueType; currentValueType != chunkenc.ValNone; currentValueType = p.currDelIter.Next() {
+		var (
+			newChunk chunkenc.Chunk
+			recoded  bool
+		)
 		// Check if the encoding has changed (i.e. we need to create a new
 		// chunk as chunks can't have multiple encoding types).
 		// For the first sample, the following condition will always be true as
 		// ValNone != ValFloat | ValHistogram | ValFloatHistogram.
-		if currentValueType != prevValueType {
+		// Also if we need to store start time (ST), but the current chunk is
+		// not capable.
+		st = p.currDelIter.AtST()
+		needTS := st != 0
+		if currentValueType != prevValueType || !hasTS && needTS {
 			if prevValueType != chunkenc.ValNone {
 				p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt})
 			}
 			cmint = p.currDelIter.AtT()
-			if currentChunk, err = currentValueType.NewChunk(); err != nil {
+			if currentChunk, err = currentValueType.NewChunk(needTS); err != nil {
 				break
 			}
 			if app, err = currentChunk.Appender(); err != nil {
 				break
 			}
+			hasTS = needTS
 		}

 		switch currentValueType {
@ -1013,14 +1005,12 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
 			{
 				var v float64
 				t, v = p.currDelIter.At()
-				st = p.currDelIter.AtST()
 				app.Append(st, t, v)
 			}
 		case chunkenc.ValHistogram:
 			{
 				var v *histogram.Histogram
 				t, v = p.currDelIter.AtHistogram(nil)
-				st = p.currDelIter.AtST()
 				// No need to set prevApp as AppendHistogram will set the
 				// counter reset header for the appender that's returned.
 				newChunk, recoded, app, err = app.AppendHistogram(nil, st, t, v, false)
@ -1029,7 +1019,6 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
 			{
 				var v *histogram.FloatHistogram
 				t, v = p.currDelIter.AtFloatHistogram(nil)
-				st = p.currDelIter.AtST()
 				// No need to set prevApp as AppendHistogram will set the
 				// counter reset header for the appender that's returned.
 				newChunk, recoded, app, err = app.AppendFloatHistogram(nil, st, t, v, false)
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@ -2025,6 +2025,207 @@ func TestPopulateWithDelSeriesIterator_NextWithMinTime(t *testing.T) {
 	}
 }

+// TestPopulateWithDelSeriesIterator_WithST tests that ST (start time) values are
+// correctly preserved when iterating through chunks with ST support.
+func TestPopulateWithDelSeriesIterator_WithST(t *testing.T) {
+	// Samples with non-zero ST values to test ST preservation.
+	samplesWithST := [][]chunks.Sample{
+		{
+			sample{st: 100, t: 1000, f: 1.0},
+			sample{st: 200, t: 2000, f: 2.0},
+			sample{st: 300, t: 3000, f: 3.0},
+		},
+	}
+
+	// Samples with varying ST patterns.
+	samplesVaryingST := [][]chunks.Sample{
+		{
+			sample{st: 0, t: 1000, f: 1.0},    // st=0
+			sample{st: 1500, t: 1500, f: 1.5}, // st=t
+			sample{st: 1900, t: 2000, f: 2.0}, // st=t-100
+			sample{st: 500, t: 3000, f: 3.0},  // st < t
+		},
+	}
+
+	cases := []struct {
+		name     string
+		samples  [][]chunks.Sample
+		expected []chunks.Sample
+	}{
+		{
+			name:    "all samples have non-zero ST",
+			samples: samplesWithST,
+			expected: []chunks.Sample{
+				sample{st: 100, t: 1000, f: 1.0},
+				sample{st: 200, t: 2000, f: 2.0},
+				sample{st: 300, t: 3000, f: 3.0},
+			},
+		},
+		{
+			name:    "samples with varying ST patterns",
+			samples: samplesVaryingST,
+			expected: []chunks.Sample{
+				sample{st: 0, t: 1000, f: 1.0},
+				sample{st: 1500, t: 1500, f: 1.5},
+				sample{st: 1900, t: 2000, f: 2.0},
+				sample{st: 500, t: 3000, f: 3.0},
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Test with chunks (not iterables).
+			t.Run("chunks", func(t *testing.T) {
+				f, chkMetas := createFakeReaderAndNotPopulatedChunks(tc.samples...)
+				it := &populateWithDelSeriesIterator{}
+				it.reset(ulid.ULID{}, f, chkMetas, nil)
+
+				var result []chunks.Sample
+				for it.Next() != chunkenc.ValNone {
+					st := it.AtST()
+					ts, v := it.At()
+					result = append(result, sample{st: st, t: ts, f: v})
+				}
+				require.NoError(t, it.Err())
+				require.Equal(t, tc.expected, result)
+			})
+
+			// Test with iterables.
+			t.Run("iterables", func(t *testing.T) {
+				f, chkMetas := createFakeReaderAndIterables(tc.samples...)
+				it := &populateWithDelSeriesIterator{}
+				it.reset(ulid.ULID{}, f, chkMetas, nil)
+
+				var result []chunks.Sample
+				for it.Next() != chunkenc.ValNone {
+					st := it.AtST()
+					ts, v := it.At()
+					result = append(result, sample{st: st, t: ts, f: v})
+				}
+				require.NoError(t, it.Err())
+				require.Equal(t, tc.expected, result)
+			})
+		})
+	}
+}
+
+// TestPopulateWithDelChunkSeriesIterator_WithST tests that ST (start time) values are
+// correctly preserved when re-encoding chunks with deletions.
+func TestPopulateWithDelChunkSeriesIterator_WithST(t *testing.T) {
+	samplesWithST := []chunks.Sample{
+		sample{st: 100, t: 1000, f: 1.0},
+		sample{st: 200, t: 2000, f: 2.0},
+		sample{st: 300, t: 3000, f: 3.0},
+		sample{st: 400, t: 4000, f: 4.0},
+		sample{st: 500, t: 5000, f: 5.0},
+	}
+	samplesWithNoLeadingST := []chunks.Sample{
+		sample{st: 0, t: 1000, f: 1.0},
+		sample{st: 0, t: 2000, f: 2.0},
+		sample{st: 0, t: 3000, f: 3.0},
+		sample{st: 400, t: 4000, f: 4.0},
+		sample{st: 500, t: 5000, f: 5.0},
+	}
+
+	cases := []struct {
+		name      string
+		samples   [][]chunks.Sample
+		intervals tombstones.Intervals
+		expected  []chunks.Sample
+	}{
+		{
+			name:      "no deletions - ST preserved",
+			samples:   [][]chunks.Sample{samplesWithST},
+			intervals: nil,
+			expected:  samplesWithST,
+		},
+		{
+			name:    "with deletions - ST preserved in remaining samples",
+			samples: [][]chunks.Sample{samplesWithST},
+			// Delete samples at t=2000 and t=4000.
+			intervals: tombstones.Intervals{{Mint: 2000, Maxt: 2000}, {Mint: 4000, Maxt: 4000}},
+			expected: []chunks.Sample{
+				sample{st: 100, t: 1000, f: 1.0},
+				sample{st: 300, t: 3000, f: 3.0},
+				sample{st: 500, t: 5000, f: 5.0},
+			},
+		},
+		{
+			name:    "delete first sample - ST preserved",
+			samples: [][]chunks.Sample{samplesWithST},
+			// Delete first sample.
+			intervals: tombstones.Intervals{{Mint: 1000, Maxt: 1000}},
+			expected: []chunks.Sample{
+				sample{st: 200, t: 2000, f: 2.0},
+				sample{st: 300, t: 3000, f: 3.0},
+				sample{st: 400, t: 4000, f: 4.0},
+				sample{st: 500, t: 5000, f: 5.0},
+			},
+		},
+		{
+			// This tests that populateCurrForSingleChunk can handle
+			// chunks that don't start with ST, but introduce ST later.
+			name:    "delete first sample - ST late preserved",
+			samples: [][]chunks.Sample{samplesWithNoLeadingST},
+			// Delete first sample.
+			intervals: tombstones.Intervals{{Mint: 1000, Maxt: 1000}},
+			expected: []chunks.Sample{
+				sample{st: 0, t: 2000, f: 2.0},
+				sample{st: 0, t: 3000, f: 3.0},
+				sample{st: 400, t: 4000, f: 4.0},
+				sample{st: 500, t: 5000, f: 5.0},
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Test with chunks that need re-encoding due to deletions.
+			t.Run("chunks", func(t *testing.T) {
+				f, chkMetas := createFakeReaderAndNotPopulatedChunks(tc.samples...)
+				it := &populateWithDelChunkSeriesIterator{}
+				it.reset(ulid.ULID{}, f, chkMetas, tc.intervals)
+
+				var result []chunks.Sample
+				for it.Next() {
+					meta := it.At()
+					chkIt := meta.Chunk.Iterator(nil)
+					for chkIt.Next() != chunkenc.ValNone {
+						st := chkIt.AtST()
+						ts, v := chkIt.At()
+						result = append(result, sample{st: st, t: ts, f: v})
+					}
+					require.NoError(t, chkIt.Err())
+				}
+				require.NoError(t, it.Err())
+				require.Equal(t, tc.expected, result)
+			})
+
+			// Test with iterables.
+			t.Run("iterables", func(t *testing.T) {
+				f, chkMetas := createFakeReaderAndIterables(tc.samples...)
+				it := &populateWithDelChunkSeriesIterator{}
+				it.reset(ulid.ULID{}, f, chkMetas, tc.intervals)
+
+				var result []chunks.Sample
+				for it.Next() {
+					meta := it.At()
+					chkIt := meta.Chunk.Iterator(nil)
+					for chkIt.Next() != chunkenc.ValNone {
+						st := chkIt.AtST()
+						ts, v := chkIt.At()
+						result = append(result, sample{st: st, t: ts, f: v})
+					}
+					require.NoError(t, chkIt.Err())
+				}
+				require.NoError(t, it.Err())
+				require.Equal(t, tc.expected, result)
+			})
+		})
+	}
+}
+
 // Test the cost of merging series sets for different number of merged sets and their size.
 // The subset are all equivalent so this does not capture merging of partial or non-overlapping sets well.
 // TODO(bwplotka): Merge with storage merged series set benchmark.
--- a/tsdb/record/bench_test.go
+++ b/tsdb/record/bench_test.go
@ -0,0 +1,207 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package record_test
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/tsdb/compression"
+	"github.com/prometheus/prometheus/tsdb/record"
+	"github.com/prometheus/prometheus/util/testrecord"
+)
+
+func zeroOutSTs(samples []record.RefSample) []record.RefSample {
+	out := make([]record.RefSample, len(samples))
+	for i := range samples {
+		out[i] = samples[i]
+		out[i].ST = 0
+	}
+	return out
+}
+
+func TestEncodeDecode(t *testing.T) {
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, tcase := range []testrecord.RefSamplesCase{
+			testrecord.Realistic1000Samples,
+			testrecord.Realistic1000WithVariableSTSamples,
+			testrecord.Realistic1000WithConstSTSamples,
+			testrecord.WorstCase1000,
+			testrecord.WorstCase1000WithSTSamples,
+		} {
+			var (
+				dec record.Decoder
+				buf []byte
+				enc = record.Encoder{EnableSTStorage: enableSTStorage}
+			)
+
+			s := testrecord.GenTestRefSamplesCase(t, tcase)
+
+			{
+				got, err := dec.Samples(enc.Samples(s, nil), nil)
+				require.NoError(t, err)
+				// if ST is off, we expect all STs to be zero
+				expected := s
+				if !enableSTStorage {
+					expected = zeroOutSTs(s)
+				}
+
+				require.Equal(t, expected, got)
+			}
+
+			//  With byte buffer (append!)
+			{
+				buf = make([]byte, 10, 1e5)
+				got, err := dec.Samples(enc.Samples(s, buf)[10:], nil)
+				require.NoError(t, err)
+
+				expected := s
+				if !enableSTStorage {
+					expected = zeroOutSTs(s)
+				}
+				require.Equal(t, expected, got)
+			}
+
+			// With sample slice
+			{
+				samples := make([]record.RefSample, 0, len(s)+1)
+				got, err := dec.Samples(enc.Samples(s, nil), samples)
+				require.NoError(t, err)
+				expected := s
+				if !enableSTStorage {
+					expected = zeroOutSTs(s)
+				}
+				require.Equal(t, expected, got)
+			}
+
+			// With compression.
+			{
+				buf := enc.Samples(s, nil)
+
+				cEnc, err := compression.NewEncoder()
+				require.NoError(t, err)
+				buf, _, err = cEnc.Encode(compression.Zstd, buf, nil)
+				require.NoError(t, err)
+
+				buf, err = compression.NewDecoder().Decode(compression.Zstd, buf, nil)
+				require.NoError(t, err)
+
+				got, err := dec.Samples(buf, nil)
+				require.NoError(t, err)
+				expected := s
+				if !enableSTStorage {
+					expected = zeroOutSTs(s)
+				}
+				require.Equal(t, expected, got)
+			}
+		}
+	}
+}
+
+var (
+	compressions = []compression.Type{compression.None, compression.Snappy, compression.Zstd}
+	dataCases    = []testrecord.RefSamplesCase{
+		testrecord.Realistic1000Samples,
+		testrecord.Realistic1000WithVariableSTSamples,
+		testrecord.Realistic1000WithConstSTSamples,
+		testrecord.WorstCase1000,
+		testrecord.WorstCase1000WithSTSamples,
+	}
+	UseV2 = true
+)
+
+/*
+	export bench=encode-v2 && go test ./tsdb/record/... \
+		-run '^$' -bench '^BenchmarkEncode_Samples' \
+		-benchtime 5s -count 6 -cpu 2 -timeout 999m \
+		| tee ${bench}.txt
+*/
+func BenchmarkEncode_Samples(b *testing.B) {
+	for _, compr := range compressions {
+		for _, data := range dataCases {
+			b.Run(fmt.Sprintf("compr=%v/data=%v", compr, data), func(b *testing.B) {
+				var (
+					samples = testrecord.GenTestRefSamplesCase(b, data)
+					enc     = record.Encoder{EnableSTStorage: UseV2}
+					buf     []byte
+					cBuf    []byte
+				)
+
+				cEnc, err := compression.NewEncoder()
+				require.NoError(b, err)
+
+				// Warm up.
+				buf = enc.Samples(samples, buf[:0])
+				cBuf, _, err = cEnc.Encode(compr, buf, cBuf[:0])
+				require.NoError(b, err)
+
+				b.ReportAllocs()
+				b.ResetTimer()
+				for b.Loop() {
+					buf = enc.Samples(samples, buf[:0])
+					b.ReportMetric(float64(len(buf)), "B/rec")
+
+					cBuf, _, _ = cEnc.Encode(compr, buf, cBuf[:0])
+					b.ReportMetric(float64(len(cBuf)), "B/compressed-rec")
+				}
+			})
+		}
+	}
+}
+
+/*
+	export bench=decode-v2 && go test ./tsdb/record/... \
+		-run '^$' -bench '^BenchmarkDecode_Samples' \
+		-benchtime 5s -count 6 -cpu 2 -timeout 999m \
+		| tee ${bench}.txt
+*/
+func BenchmarkDecode_Samples(b *testing.B) {
+	for _, compr := range compressions {
+		for _, data := range dataCases {
+			b.Run(fmt.Sprintf("compr=%v/data=%v", compr, data), func(b *testing.B) {
+				var (
+					samples    = testrecord.GenTestRefSamplesCase(b, data)
+					enc        = record.Encoder{EnableSTStorage: UseV2}
+					dec        record.Decoder
+					cDec       = compression.NewDecoder()
+					cBuf       []byte
+					samplesBuf []record.RefSample
+				)
+
+				buf := enc.Samples(samples, nil)
+
+				cEnc, err := compression.NewEncoder()
+				require.NoError(b, err)
+
+				buf, _, err = cEnc.Encode(compr, buf, nil)
+				require.NoError(b, err)
+
+				// Warm up.
+				cBuf, err = cDec.Decode(compr, buf, cBuf[:0])
+				require.NoError(b, err)
+				samplesBuf, err = dec.Samples(cBuf, samplesBuf[:0])
+				require.NoError(b, err)
+
+				b.ReportAllocs()
+				b.ResetTimer()
+				for b.Loop() {
+					cBuf, _ = cDec.Decode(compr, buf, cBuf[:0])
+					samplesBuf, _ = dec.Samples(cBuf, samplesBuf[:0])
+				}
+			})
+		}
+	}
+}
--- a/tsdb/record/record.go
+++ b/tsdb/record/record.go
@ -58,6 +58,8 @@ const (
 	CustomBucketsHistogramSamples Type = 9
 	// CustomBucketsFloatHistogramSamples is used to match WAL records of type Float Histogram with custom buckets.
 	CustomBucketsFloatHistogramSamples Type = 10
+	// SamplesV2 is an enhanced sample record with an encoding scheme that allows storing float samples with timestamp and an optional ST per sample.
+	SamplesV2 Type = 11
 )

 func (rt Type) String() string {
@ -66,6 +68,8 @@ func (rt Type) String() string {
 		return "series"
 	case Samples:
 		return "samples"
+	case SamplesV2:
+		return "samples-v2"
 	case Tombstones:
 		return "tombstones"
 	case Exemplars:
@ -157,12 +161,12 @@ type RefSeries struct {
 	Labels labels.Labels
 }

-// RefSample is a timestamp/value pair associated with a reference to a series.
+// RefSample is a timestamp/st/value struct associated with a reference to a series.
 // TODO(beorn7): Perhaps make this "polymorphic", including histogram and float-histogram pointers? Then get rid of RefHistogramSample.
 type RefSample struct {
-	Ref chunks.HeadSeriesRef
-	T   int64
-	V   float64
+	Ref   chunks.HeadSeriesRef
+	ST, T int64
+	V     float64
 }

 // RefMetadata is the metadata associated with a series ID.
@ -182,6 +186,7 @@ type RefExemplar struct {
 }

 // RefHistogramSample is a histogram.
+// TODO(owilliams): Add support for ST.
 type RefHistogramSample struct {
 	Ref chunks.HeadSeriesRef
 	T   int64
@ -189,6 +194,7 @@ type RefHistogramSample struct {
 }

 // RefFloatHistogramSample is a float histogram.
+// TODO(owilliams): Add support for ST.
 type RefFloatHistogramSample struct {
 	Ref chunks.HeadSeriesRef
 	T   int64
@ -220,7 +226,7 @@ func (*Decoder) Type(rec []byte) Type {
 		return Unknown
 	}
 	switch t := Type(rec[0]); t {
-	case Series, Samples, Tombstones, Exemplars, MmapMarkers, Metadata, HistogramSamples, FloatHistogramSamples, CustomBucketsHistogramSamples, CustomBucketsFloatHistogramSamples:
+	case Series, Samples, SamplesV2, Tombstones, Exemplars, MmapMarkers, Metadata, HistogramSamples, FloatHistogramSamples, CustomBucketsHistogramSamples, CustomBucketsFloatHistogramSamples:
 		return t
 	}
 	return Unknown
@ -311,12 +317,20 @@ func (d *Decoder) DecodeLabels(dec *encoding.Decbuf) labels.Labels {
 }

 // Samples appends samples in rec to the given slice.
-func (*Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
+func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
 	dec := encoding.Decbuf{B: rec}
-
-	if Type(dec.Byte()) != Samples {
-		return nil, errors.New("invalid record type")
+	switch typ := dec.Byte(); Type(typ) {
+	case Samples:
+		return d.samplesV1(&dec, samples)
+	case SamplesV2:
+		return d.samplesV2(&dec, samples)
+	default:
+		return nil, fmt.Errorf("invalid record type %v, expected Samples(2) or SamplesV2(11)", typ)
 	}
+}
+
+// samplesV1 appends samples in rec to the given slice, while ignoring ST information.
+func (*Decoder) samplesV1(dec *encoding.Decbuf, samples []RefSample) ([]RefSample, error) {
 	if dec.Len() == 0 {
 		return samples, nil
 	}
@ -349,6 +363,60 @@ func (*Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
 	return samples, nil
 }

+// samplesV2 appends samples in rec to the given slice using the V2 algorithm,
+// which is more efficient and supports ST (See Encoder.samplesV2 definition).
+func (*Decoder) samplesV2(dec *encoding.Decbuf, samples []RefSample) ([]RefSample, error) {
+	if dec.Len() == 0 {
+		return samples, nil
+	}
+	// Allow 1 byte for each varint and 8 for the value; the output slice must be at least that big.
+	if minSize := dec.Len() / (1 + 1 + 8); cap(samples) < minSize {
+		samples = make([]RefSample, 0, minSize)
+	}
+	var firstT, firstST int64
+	for len(dec.B) > 0 && dec.Err() == nil {
+		var prev RefSample
+		var ref, t, ST int64
+		var val uint64
+
+		if len(samples) == 0 {
+			ref = dec.Varint64()
+			firstT = dec.Varint64()
+			t = firstT
+			ST = dec.Varint64()
+			firstST = ST
+		} else {
+			prev = samples[len(samples)-1]
+			ref = int64(prev.Ref) + dec.Varint64()
+			t = firstT + dec.Varint64()
+			stMarker := dec.Byte()
+			switch stMarker {
+			case noST:
+			case sameST:
+				ST = prev.ST
+			default:
+				ST = firstST + dec.Varint64()
+			}
+		}
+
+		val = dec.Be64()
+		samples = append(samples, RefSample{
+			Ref: chunks.HeadSeriesRef(ref),
+			ST:  ST,
+			T:   t,
+			V:   math.Float64frombits(val),
+		})
+	}
+
+	if dec.Err() != nil {
+		return nil, fmt.Errorf("decode error after %d samples: %w", len(samples), dec.Err())
+	}
+	if len(dec.B) > 0 {
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
+	}
+	return samples, nil
+}
+
 // Tombstones appends tombstones in rec to the given slice.
 func (*Decoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) {
 	dec := encoding.Decbuf{B: rec}
@ -656,7 +724,11 @@ func DecodeFloatHistogram(buf *encoding.Decbuf, fh *histogram.FloatHistogram) {

 // Encoder encodes series, sample, and tombstones records.
 // The zero value is ready to use.
-type Encoder struct{}
+type Encoder struct {
+	// EnableSTStorage enables the SamplesV2 encoding, which is more efficient
+	// than V1 and supports start time per sample.
+	EnableSTStorage bool
+}

 // Series appends the encoded series to b and returns the resulting slice.
 func (*Encoder) Series(series []RefSeries, b []byte) []byte {
@ -702,7 +774,16 @@ func EncodeLabels(buf *encoding.Encbuf, lbls labels.Labels) {
 }

 // Samples appends the encoded samples to b and returns the resulting slice.
-func (*Encoder) Samples(samples []RefSample, b []byte) []byte {
+// Depending on the ST existence it either writes Samples or SamplesWithST record.
+func (e *Encoder) Samples(samples []RefSample, b []byte) []byte {
+	if e.EnableSTStorage {
+		return e.samplesV2(samples, b)
+	}
+	return e.samplesV1(samples, b)
+}
+
+// Samples appends the encoded samples to b and returns the resulting slice.
+func (*Encoder) samplesV1(samples []RefSample, b []byte) []byte {
 	buf := encoding.Encbuf{B: b}
 	buf.PutByte(byte(Samples))

@ -725,6 +806,56 @@ func (*Encoder) Samples(samples []RefSample, b []byte) []byte {
 	return buf.Get()
 }

+const (
+	// Start timestamp marker values for indicating trivial cases.
+
+	noST       byte = iota // Sample has no start time.
+	sameST                 // Sample timestamp exists and is the same as the start time of the previous series.
+	explicitST             // Explicit start timestamp value, delta to first start time.
+)
+
+// samplesV2 appends the encoded samples to b and returns the resulting slice
+// using a more efficient per-sample delta encoding and allows for ST
+// storage.
+func (*Encoder) samplesV2(samples []RefSample, b []byte) []byte {
+	buf := encoding.Encbuf{B: b}
+	buf.PutByte(byte(SamplesV2))
+
+	if len(samples) == 0 {
+		return buf.Get()
+	}
+
+	// Store first ref, timestamp, ST, and value.
+	first := samples[0]
+	buf.PutVarint64(int64(first.Ref))
+	buf.PutVarint64(first.T)
+	buf.PutVarint64(first.ST)
+	buf.PutBE64(math.Float64bits(first.V))
+
+	// Subsequent values are delta to the immediate previous values, and in the
+	// case of start timestamp, use the marker byte to indicate what the value should
+	// be if it's one of the trivial cases.
+	for i := 1; i < len(samples); i++ {
+		s := samples[i]
+		prev := samples[i-1]
+
+		buf.PutVarint64(int64(s.Ref) - int64(prev.Ref))
+		buf.PutVarint64(s.T - first.T)
+
+		switch s.ST {
+		case 0:
+			buf.PutByte(noST)
+		case prev.ST:
+			buf.PutByte(sameST)
+		default:
+			buf.PutByte(explicitST)
+			buf.PutVarint64(s.ST - first.ST)
+		}
+		buf.PutBE64(math.Float64bits(s.V))
+	}
+	return buf.Get()
+}
+
 // Tombstones appends the encoded tombstones to b and returns the resulting slice.
 func (*Encoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte {
 	buf := encoding.Encbuf{B: b}
--- a/tsdb/record/record_test.go
+++ b/tsdb/record/record_test.go
@ -76,15 +76,63 @@ func TestRecord_EncodeDecode(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, metadata, decMetadata)

+	// Without ST.
 	samples := []RefSample{
 		{Ref: 0, T: 12423423, V: 1.2345},
 		{Ref: 123, T: -1231, V: -123},
 		{Ref: 2, T: 0, V: 99999},
 	}
-	decSamples, err := dec.Samples(enc.Samples(samples, nil), nil)
+	encoded := enc.Samples(samples, nil)
+	require.Equal(t, Samples, dec.Type(encoded))
+	decSamples, err := dec.Samples(encoded, nil)
 	require.NoError(t, err)
 	require.Equal(t, samples, decSamples)

+	enc = Encoder{EnableSTStorage: true}
+	// Without ST again, but with V1 encoder that enables SamplesV2.
+	samples = []RefSample{
+		{Ref: 0, T: 12423423, V: 1.2345},
+		{Ref: 123, T: -1231, V: -123},
+		{Ref: 2, T: 0, V: 99999},
+	}
+	encoded = enc.Samples(samples, nil)
+	require.Equal(t, SamplesV2, dec.Type(encoded))
+	decSamples, err = dec.Samples(encoded, nil)
+	require.NoError(t, err)
+	require.Equal(t, samples, decSamples)
+
+	// With ST.
+	samplesWithST := []RefSample{
+		{Ref: 0, T: 12423423, ST: 14, V: 1.2345},
+		{Ref: 123, T: -1231, ST: 14, V: -123},
+		{Ref: 2, T: 0, ST: 14, V: 99999},
+	}
+	encoded = enc.Samples(samplesWithST, nil)
+	require.Equal(t, SamplesV2, dec.Type(encoded))
+	decSamples, err = dec.Samples(encoded, nil)
+	require.NoError(t, err)
+	require.Equal(t, samplesWithST, decSamples)
+
+	// With ST (ST[i] == T[i-1]).
+	samplesWithSTDelta := []RefSample{
+		{Ref: 0, T: 12423400, ST: 12423300, V: 1.2345},
+		{Ref: 123, T: 12423500, ST: 12423400, V: -123},
+		{Ref: 2, T: 12423600, ST: 12423500, V: 99999},
+	}
+	decSamples, err = dec.Samples(enc.Samples(samplesWithSTDelta, nil), nil)
+	require.NoError(t, err)
+	require.Equal(t, samplesWithSTDelta, decSamples)
+
+	// With ST (ST[i] == ST[i-1]).
+	samplesWithConstST := []RefSample{
+		{Ref: 0, T: 12423400, ST: 12423300, V: 1.2345},
+		{Ref: 123, T: 12423500, ST: 12423300, V: -123},
+		{Ref: 2, T: 12423600, ST: 12423300, V: 99999},
+	}
+	decSamples, err = dec.Samples(enc.Samples(samplesWithConstST, nil), nil)
+	require.NoError(t, err)
+	require.Equal(t, samplesWithConstST, decSamples)
+
 	// Intervals get split up into single entries. So we don't get back exactly
 	// what we put in.
 	tstones := []tombstones.Stone{
@ -227,252 +275,262 @@ func TestRecord_EncodeDecode(t *testing.T) {
 }

 func TestRecord_DecodeInvalidHistogramSchema(t *testing.T) {
-	for _, schema := range []int32{-100, 100} {
-		t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
-			var enc Encoder
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, schema := range []int32{-100, 100} {
+			t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
+				enc := Encoder{EnableSTStorage: enableSTStorage}

-			var output bytes.Buffer
-			logger := promslog.New(&promslog.Config{Writer: &output})
-			dec := NewDecoder(labels.NewSymbolTable(), logger)
-			histograms := []RefHistogramSample{
-				{
-					Ref: 56,
-					T:   1234,
-					H: &histogram.Histogram{
-						Count:         5,
-						ZeroCount:     2,
-						ZeroThreshold: 0.001,
-						Sum:           18.4 * rand.Float64(),
-						Schema:        schema,
-						PositiveSpans: []histogram.Span{
-							{Offset: 0, Length: 2},
-							{Offset: 1, Length: 2},
+				var output bytes.Buffer
+				logger := promslog.New(&promslog.Config{Writer: &output})
+				dec := NewDecoder(labels.NewSymbolTable(), logger)
+				histograms := []RefHistogramSample{
+					{
+						Ref: 56,
+						T:   1234,
+						H: &histogram.Histogram{
+							Count:         5,
+							ZeroCount:     2,
+							ZeroThreshold: 0.001,
+							Sum:           18.4 * rand.Float64(),
+							Schema:        schema,
+							PositiveSpans: []histogram.Span{
+								{Offset: 0, Length: 2},
+								{Offset: 1, Length: 2},
+							},
+							PositiveBuckets: []int64{1, 1, -1, 0},
 						},
-						PositiveBuckets: []int64{1, 1, -1, 0},
 					},
-				},
-			}
-			histSamples, _ := enc.HistogramSamples(histograms, nil)
-			decHistograms, err := dec.HistogramSamples(histSamples, nil)
-			require.NoError(t, err)
-			require.Empty(t, decHistograms)
-			require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
-		})
+				}
+				histSamples, _ := enc.HistogramSamples(histograms, nil)
+				decHistograms, err := dec.HistogramSamples(histSamples, nil)
+				require.NoError(t, err)
+				require.Empty(t, decHistograms)
+				require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
+			})
+		}
 	}
 }

 func TestRecord_DecodeInvalidFloatHistogramSchema(t *testing.T) {
-	for _, schema := range []int32{-100, 100} {
-		t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
-			var enc Encoder
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, schema := range []int32{-100, 100} {
+			t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
+				enc := Encoder{EnableSTStorage: enableSTStorage}

-			var output bytes.Buffer
-			logger := promslog.New(&promslog.Config{Writer: &output})
-			dec := NewDecoder(labels.NewSymbolTable(), logger)
-			histograms := []RefFloatHistogramSample{
-				{
-					Ref: 56,
-					T:   1234,
-					FH: &histogram.FloatHistogram{
-						Count:         5,
-						ZeroCount:     2,
-						ZeroThreshold: 0.001,
-						Sum:           18.4 * rand.Float64(),
-						Schema:        schema,
-						PositiveSpans: []histogram.Span{
-							{Offset: 0, Length: 2},
-							{Offset: 1, Length: 2},
+				var output bytes.Buffer
+				logger := promslog.New(&promslog.Config{Writer: &output})
+				dec := NewDecoder(labels.NewSymbolTable(), logger)
+				histograms := []RefFloatHistogramSample{
+					{
+						Ref: 56,
+						T:   1234,
+						FH: &histogram.FloatHistogram{
+							Count:         5,
+							ZeroCount:     2,
+							ZeroThreshold: 0.001,
+							Sum:           18.4 * rand.Float64(),
+							Schema:        schema,
+							PositiveSpans: []histogram.Span{
+								{Offset: 0, Length: 2},
+								{Offset: 1, Length: 2},
+							},
+							PositiveBuckets: []float64{1, 1, -1, 0},
 						},
-						PositiveBuckets: []float64{1, 1, -1, 0},
 					},
-				},
-			}
-			histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
-			decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
-			require.NoError(t, err)
-			require.Empty(t, decHistograms)
-			require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
-		})
+				}
+				histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
+				decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
+				require.NoError(t, err)
+				require.Empty(t, decHistograms)
+				require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record")
+			})
+		}
 	}
 }

 func TestRecord_DecodeTooHighResolutionHistogramSchema(t *testing.T) {
-	for _, schema := range []int32{9, 52} {
-		t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
-			var enc Encoder
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, schema := range []int32{9, 52} {
+			t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
+				enc := Encoder{EnableSTStorage: enableSTStorage}

-			var output bytes.Buffer
-			logger := promslog.New(&promslog.Config{Writer: &output})
-			dec := NewDecoder(labels.NewSymbolTable(), logger)
-			histograms := []RefHistogramSample{
-				{
-					Ref: 56,
-					T:   1234,
-					H: &histogram.Histogram{
-						Count:         5,
-						ZeroCount:     2,
-						ZeroThreshold: 0.001,
-						Sum:           18.4 * rand.Float64(),
-						Schema:        schema,
-						PositiveSpans: []histogram.Span{
-							{Offset: 0, Length: 2},
-							{Offset: 1, Length: 2},
+				var output bytes.Buffer
+				logger := promslog.New(&promslog.Config{Writer: &output})
+				dec := NewDecoder(labels.NewSymbolTable(), logger)
+				histograms := []RefHistogramSample{
+					{
+						Ref: 56,
+						T:   1234,
+						H: &histogram.Histogram{
+							Count:         5,
+							ZeroCount:     2,
+							ZeroThreshold: 0.001,
+							Sum:           18.4 * rand.Float64(),
+							Schema:        schema,
+							PositiveSpans: []histogram.Span{
+								{Offset: 0, Length: 2},
+								{Offset: 1, Length: 2},
+							},
+							PositiveBuckets: []int64{1, 1, -1, 0},
 						},
-						PositiveBuckets: []int64{1, 1, -1, 0},
 					},
-				},
-			}
-			histSamples, _ := enc.HistogramSamples(histograms, nil)
-			decHistograms, err := dec.HistogramSamples(histSamples, nil)
-			require.NoError(t, err)
-			require.Len(t, decHistograms, 1)
-			require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].H.Schema)
-		})
+				}
+				histSamples, _ := enc.HistogramSamples(histograms, nil)
+				decHistograms, err := dec.HistogramSamples(histSamples, nil)
+				require.NoError(t, err)
+				require.Len(t, decHistograms, 1)
+				require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].H.Schema)
+			})
+		}
 	}
 }

 func TestRecord_DecodeTooHighResolutionFloatHistogramSchema(t *testing.T) {
-	for _, schema := range []int32{9, 52} {
-		t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) {
-			var enc Encoder
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, schema := range []int32{9, 52} {
+			t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableSTStorage), func(t *testing.T) {
+				enc := Encoder{EnableSTStorage: enableSTStorage}

-			var output bytes.Buffer
-			logger := promslog.New(&promslog.Config{Writer: &output})
-			dec := NewDecoder(labels.NewSymbolTable(), logger)
-			histograms := []RefFloatHistogramSample{
-				{
-					Ref: 56,
-					T:   1234,
-					FH: &histogram.FloatHistogram{
-						Count:         5,
-						ZeroCount:     2,
-						ZeroThreshold: 0.001,
-						Sum:           18.4 * rand.Float64(),
-						Schema:        schema,
-						PositiveSpans: []histogram.Span{
-							{Offset: 0, Length: 2},
-							{Offset: 1, Length: 2},
+				var output bytes.Buffer
+				logger := promslog.New(&promslog.Config{Writer: &output})
+				dec := NewDecoder(labels.NewSymbolTable(), logger)
+				histograms := []RefFloatHistogramSample{
+					{
+						Ref: 56,
+						T:   1234,
+						FH: &histogram.FloatHistogram{
+							Count:         5,
+							ZeroCount:     2,
+							ZeroThreshold: 0.001,
+							Sum:           18.4 * rand.Float64(),
+							Schema:        schema,
+							PositiveSpans: []histogram.Span{
+								{Offset: 0, Length: 2},
+								{Offset: 1, Length: 2},
+							},
+							PositiveBuckets: []float64{1, 1, -1, 0},
 						},
-						PositiveBuckets: []float64{1, 1, -1, 0},
 					},
-				},
-			}
-			histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
-			decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
-			require.NoError(t, err)
-			require.Len(t, decHistograms, 1)
-			require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].FH.Schema)
-		})
+				}
+				histSamples, _ := enc.FloatHistogramSamples(histograms, nil)
+				decHistograms, err := dec.FloatHistogramSamples(histSamples, nil)
+				require.NoError(t, err)
+				require.Len(t, decHistograms, 1)
+				require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].FH.Schema)
+			})
+		}
 	}
 }

 // TestRecord_Corrupted ensures that corrupted records return the correct error.
 // Bugfix check for pull/521 and pull/523.
 func TestRecord_Corrupted(t *testing.T) {
-	var enc Encoder
-	dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
+	for _, enableSTStorage := range []bool{false, true} {
+		enc := Encoder{EnableSTStorage: enableSTStorage}
+		dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())

-	t.Run("Test corrupted series record", func(t *testing.T) {
-		series := []RefSeries{
-			{
-				Ref:    100,
-				Labels: labels.FromStrings("abc", "def", "123", "456"),
-			},
-		}
-
-		corrupted := enc.Series(series, nil)[:8]
-		_, err := dec.Series(corrupted, nil)
-		require.Equal(t, err, encoding.ErrInvalidSize)
-	})
-
-	t.Run("Test corrupted sample record", func(t *testing.T) {
-		samples := []RefSample{
-			{Ref: 0, T: 12423423, V: 1.2345},
-		}
-
-		corrupted := enc.Samples(samples, nil)[:8]
-		_, err := dec.Samples(corrupted, nil)
-		require.ErrorIs(t, err, encoding.ErrInvalidSize)
-	})
-
-	t.Run("Test corrupted tombstone record", func(t *testing.T) {
-		tstones := []tombstones.Stone{
-			{Ref: 123, Intervals: tombstones.Intervals{
-				{Mint: -1000, Maxt: 1231231},
-				{Mint: 5000, Maxt: 0},
-			}},
-		}
-
-		corrupted := enc.Tombstones(tstones, nil)[:8]
-		_, err := dec.Tombstones(corrupted, nil)
-		require.Equal(t, err, encoding.ErrInvalidSize)
-	})
-
-	t.Run("Test corrupted exemplar record", func(t *testing.T) {
-		exemplars := []RefExemplar{
-			{Ref: 0, T: 12423423, V: 1.2345, Labels: labels.FromStrings("trace_id", "asdf")},
-		}
-
-		corrupted := enc.Exemplars(exemplars, nil)[:8]
-		_, err := dec.Exemplars(corrupted, nil)
-		require.ErrorIs(t, err, encoding.ErrInvalidSize)
-	})
-
-	t.Run("Test corrupted metadata record", func(t *testing.T) {
-		meta := []RefMetadata{
-			{Ref: 147, Type: uint8(Counter), Unit: "unit", Help: "help"},
-		}
-
-		corrupted := enc.Metadata(meta, nil)[:8]
-		_, err := dec.Metadata(corrupted, nil)
-		require.ErrorIs(t, err, encoding.ErrInvalidSize)
-	})
-
-	t.Run("Test corrupted histogram record", func(t *testing.T) {
-		histograms := []RefHistogramSample{
-			{
-				Ref: 56,
-				T:   1234,
-				H: &histogram.Histogram{
-					Count:         5,
-					ZeroCount:     2,
-					ZeroThreshold: 0.001,
-					Sum:           18.4 * rand.Float64(),
-					Schema:        1,
-					PositiveSpans: []histogram.Span{
-						{Offset: 0, Length: 2},
-						{Offset: 1, Length: 2},
-					},
-					PositiveBuckets: []int64{1, 1, -1, 0},
+		t.Run("Test corrupted series record", func(t *testing.T) {
+			series := []RefSeries{
+				{
+					Ref:    100,
+					Labels: labels.FromStrings("abc", "def", "123", "456"),
 				},
-			},
-			{
-				Ref: 67,
-				T:   5678,
-				H: &histogram.Histogram{
-					Count:         8,
-					ZeroThreshold: 0.001,
-					Sum:           35.5,
-					Schema:        -53,
-					PositiveSpans: []histogram.Span{
-						{Offset: 0, Length: 2},
-						{Offset: 2, Length: 2},
-					},
-					PositiveBuckets: []int64{2, -1, 2, 0},
-					CustomValues:    []float64{0, 2, 4, 6, 8},
-				},
-			},
-		}
+			}

-		corruptedHists, customBucketsHists := enc.HistogramSamples(histograms, nil)
-		corruptedHists = corruptedHists[:8]
-		corruptedCustomBucketsHists := enc.CustomBucketsHistogramSamples(customBucketsHists, nil)
-		corruptedCustomBucketsHists = corruptedCustomBucketsHists[:8]
-		_, err := dec.HistogramSamples(corruptedHists, nil)
-		require.ErrorIs(t, err, encoding.ErrInvalidSize)
-		_, err = dec.HistogramSamples(corruptedCustomBucketsHists, nil)
-		require.ErrorIs(t, err, encoding.ErrInvalidSize)
-	})
+			corrupted := enc.Series(series, nil)[:8]
+			_, err := dec.Series(corrupted, nil)
+			require.Equal(t, err, encoding.ErrInvalidSize)
+		})
+
+		t.Run("Test corrupted sample record", func(t *testing.T) {
+			samples := []RefSample{
+				{Ref: 0, T: 12423423, V: 1.2345},
+			}
+
+			corrupted := enc.Samples(samples, nil)[:8]
+			_, err := dec.Samples(corrupted, nil)
+			require.ErrorIs(t, err, encoding.ErrInvalidSize)
+		})
+
+		t.Run("Test corrupted tombstone record", func(t *testing.T) {
+			tstones := []tombstones.Stone{
+				{Ref: 123, Intervals: tombstones.Intervals{
+					{Mint: -1000, Maxt: 1231231},
+					{Mint: 5000, Maxt: 0},
+				}},
+			}
+
+			corrupted := enc.Tombstones(tstones, nil)[:8]
+			_, err := dec.Tombstones(corrupted, nil)
+			require.Equal(t, err, encoding.ErrInvalidSize)
+		})
+
+		t.Run("Test corrupted exemplar record", func(t *testing.T) {
+			exemplars := []RefExemplar{
+				{Ref: 0, T: 12423423, V: 1.2345, Labels: labels.FromStrings("trace_id", "asdf")},
+			}
+
+			corrupted := enc.Exemplars(exemplars, nil)[:8]
+			_, err := dec.Exemplars(corrupted, nil)
+			require.ErrorIs(t, err, encoding.ErrInvalidSize)
+		})
+
+		t.Run("Test corrupted metadata record", func(t *testing.T) {
+			meta := []RefMetadata{
+				{Ref: 147, Type: uint8(Counter), Unit: "unit", Help: "help"},
+			}
+
+			corrupted := enc.Metadata(meta, nil)[:8]
+			_, err := dec.Metadata(corrupted, nil)
+			require.ErrorIs(t, err, encoding.ErrInvalidSize)
+		})
+
+		t.Run("Test corrupted histogram record", func(t *testing.T) {
+			histograms := []RefHistogramSample{
+				{
+					Ref: 56,
+					T:   1234,
+					H: &histogram.Histogram{
+						Count:         5,
+						ZeroCount:     2,
+						ZeroThreshold: 0.001,
+						Sum:           18.4 * rand.Float64(),
+						Schema:        1,
+						PositiveSpans: []histogram.Span{
+							{Offset: 0, Length: 2},
+							{Offset: 1, Length: 2},
+						},
+						PositiveBuckets: []int64{1, 1, -1, 0},
+					},
+				},
+				{
+					Ref: 67,
+					T:   5678,
+					H: &histogram.Histogram{
+						Count:         8,
+						ZeroThreshold: 0.001,
+						Sum:           35.5,
+						Schema:        -53,
+						PositiveSpans: []histogram.Span{
+							{Offset: 0, Length: 2},
+							{Offset: 2, Length: 2},
+						},
+						PositiveBuckets: []int64{2, -1, 2, 0},
+						CustomValues:    []float64{0, 2, 4, 6, 8},
+					},
+				},
+			}
+
+			corruptedHists, customBucketsHists := enc.HistogramSamples(histograms, nil)
+			corruptedHists = corruptedHists[:8]
+			corruptedCustomBucketsHists := enc.CustomBucketsHistogramSamples(customBucketsHists, nil)
+			corruptedCustomBucketsHists = corruptedCustomBucketsHists[:8]
+			_, err := dec.HistogramSamples(corruptedHists, nil)
+			require.ErrorIs(t, err, encoding.ErrInvalidSize)
+			_, err = dec.HistogramSamples(corruptedCustomBucketsHists, nil)
+			require.ErrorIs(t, err, encoding.ErrInvalidSize)
+		})
+	}
 }

 func TestRecord_Type(t *testing.T) {
@ -487,6 +545,16 @@ func TestRecord_Type(t *testing.T) {
 	recordType = dec.Type(enc.Samples(samples, nil))
 	require.Equal(t, Samples, recordType)

+	// With EnableSTStorage set, all Samples are V2.
+	enc = Encoder{EnableSTStorage: true}
+	samples = []RefSample{{Ref: 123, T: 12345, V: 1.2345}}
+	recordType = dec.Type(enc.Samples(samples, nil))
+	require.Equal(t, SamplesV2, recordType)
+
+	samplesST := []RefSample{{Ref: 123, ST: 1, T: 12345, V: 1.2345}}
+	recordType = dec.Type(enc.Samples(samplesST, nil))
+	require.Equal(t, SamplesV2, recordType)
+
 	tstones := []tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{{Mint: 1, Maxt: 2}}}}
 	recordType = dec.Type(enc.Tombstones(tstones, nil))
 	require.Equal(t, Tombstones, recordType)
@ -716,24 +784,26 @@ func BenchmarkWAL_HistogramEncoding(b *testing.B) {
 			make: initNHCBRefs,
 		},
 	} {
-		for _, labelCount := range []int{0, 10, 50} {
-			for _, histograms := range []int{10, 100, 1000} {
-				for _, buckets := range []int{0, 1, 10, 100} {
-					b.Run(fmt.Sprintf("type=%s/labels=%d/histograms=%d/buckets=%d", maker.name, labelCount, histograms, buckets), func(b *testing.B) {
-						series, samples, nhcbs := maker.make(labelCount, histograms, buckets)
-						enc := Encoder{}
-						for b.Loop() {
-							var buf []byte
-							enc.Series(series, buf)
-							enc.Samples(samples, buf)
-							var leftOver []RefHistogramSample
-							_, leftOver = enc.HistogramSamples(nhcbs, buf)
-							if len(leftOver) > 0 {
-								enc.CustomBucketsHistogramSamples(leftOver, buf)
+		for _, enableSTStorage := range []bool{false, true} {
+			for _, labelCount := range []int{0, 10, 50} {
+				for _, histograms := range []int{10, 100, 1000} {
+					for _, buckets := range []int{0, 1, 10, 100} {
+						b.Run(fmt.Sprintf("type=%s/labels=%d/histograms=%d/buckets=%d", maker.name, labelCount, histograms, buckets), func(b *testing.B) {
+							series, samples, nhcbs := maker.make(labelCount, histograms, buckets)
+							enc := Encoder{EnableSTStorage: enableSTStorage}
+							for b.Loop() {
+								var buf []byte
+								enc.Series(series, buf)
+								enc.Samples(samples, buf)
+								var leftOver []RefHistogramSample
+								_, leftOver = enc.HistogramSamples(nhcbs, buf)
+								if len(leftOver) > 0 {
+									enc.CustomBucketsHistogramSamples(leftOver, buf)
+								}
+								b.ReportMetric(float64(len(buf)), "recordBytes/ops")
 							}
-							b.ReportMetric(float64(len(buf)), "recordBytes/ops")
-						}
-					})
+						})
+					}
 				}
 			}
 		}
--- a/tsdb/wlog/checkpoint.go
+++ b/tsdb/wlog/checkpoint.go
@ -102,7 +102,7 @@ func DeleteTempCheckpoints(logger *slog.Logger, dir string) error {
 // segmented format as the original WAL itself.
 // This makes it easy to read it through the WAL package and concatenate
 // it with the original WAL.
-func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) {
+func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64, enableSTStorage bool) (*CheckpointStats, error) {
 	stats := &CheckpointStats{}
 	var sgmReader io.ReadCloser

@ -166,7 +166,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He
 		metadata              []record.RefMetadata
 		st                    = labels.NewSymbolTable() // Needed for decoding; labels do not outlive this function.
 		dec                   = record.NewDecoder(st, logger)
-		enc                   record.Encoder
+		enc                   = record.Encoder{EnableSTStorage: enableSTStorage}
 		buf                   []byte
 		recs                  [][]byte

@ -200,7 +200,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He
 			stats.TotalSeries += len(series)
 			stats.DroppedSeries += len(series) - len(repl)

-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			samples, err = dec.Samples(rec, samples)
 			if err != nil {
 				return nil, fmt.Errorf("decode samples: %w", err)
--- a/tsdb/wlog/checkpoint_test.go
+++ b/tsdb/wlog/checkpoint_test.go
@ -171,251 +171,257 @@ func TestCheckpoint(t *testing.T) {
 		}
 	}

-	for _, compress := range compression.Types() {
-		t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) {
-			dir := t.TempDir()
+	for _, enableSTStorage := range []bool{false, true} {
+		for _, compress := range compression.Types() {
+			t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableSTStorage), func(t *testing.T) {
+				dir := t.TempDir()

-			var enc record.Encoder
-			// Create a dummy segment to bump the initial number.
-			seg, err := CreateSegment(dir, 100)
-			require.NoError(t, err)
-			require.NoError(t, seg.Close())
-
-			// Manually create checkpoint for 99 and earlier.
-			w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress)
-			require.NoError(t, err)
-
-			// Add some data we expect to be around later.
-			err = w.Log(enc.Series([]record.RefSeries{
-				{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
-				{Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")},
-			}, nil))
-			require.NoError(t, err)
-			// Log an unknown record, that might have come from a future Prometheus version.
-			require.NoError(t, w.Log([]byte{255}))
-			require.NoError(t, w.Close())
-
-			// Start a WAL and write records to it as usual.
-			w, err = NewSize(nil, nil, dir, 128*1024, compress)
-			require.NoError(t, err)
-
-			samplesInWAL, histogramsInWAL, floatHistogramsInWAL := 0, 0, 0
-			var last int64
-			for i := 0; ; i++ {
-				_, n, err := Segments(w.Dir())
+				enc := record.Encoder{EnableSTStorage: enableSTStorage}
+				// Create a dummy segment to bump the initial number.
+				seg, err := CreateSegment(dir, 100)
 				require.NoError(t, err)
-				if n >= 106 {
-					break
-				}
-				// Write some series initially.
-				if i == 0 {
-					b := enc.Series([]record.RefSeries{
-						{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
-						{Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")},
-						{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
-						{Ref: 5, Labels: labels.FromStrings("a", "b", "c", "5")},
+				require.NoError(t, seg.Close())
+
+				// Manually create checkpoint for 99 and earlier.
+				w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress)
+				require.NoError(t, err)
+
+				// Add some data we expect to be around later.
+				err = w.Log(enc.Series([]record.RefSeries{
+					{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
+					{Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")},
+				}, nil))
+				require.NoError(t, err)
+				// Log an unknown record, that might have come from a future Prometheus version.
+				require.NoError(t, w.Log([]byte{255}))
+				require.NoError(t, w.Close())
+
+				// Start a WAL and write records to it as usual.
+				w, err = NewSize(nil, nil, dir, 128*1024, compress)
+				require.NoError(t, err)
+
+				samplesInWAL, histogramsInWAL, floatHistogramsInWAL := 0, 0, 0
+				var last int64
+				for i := 0; ; i++ {
+					_, n, err := Segments(w.Dir())
+					require.NoError(t, err)
+					if n >= 106 {
+						break
+					}
+					// Write some series initially.
+					if i == 0 {
+						b := enc.Series([]record.RefSeries{
+							{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
+							{Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")},
+							{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
+							{Ref: 5, Labels: labels.FromStrings("a", "b", "c", "5")},
+						}, nil)
+						require.NoError(t, w.Log(b))
+
+						b = enc.Metadata([]record.RefMetadata{
+							{Ref: 2, Unit: "unit", Help: "help"},
+							{Ref: 3, Unit: "unit", Help: "help"},
+							{Ref: 4, Unit: "unit", Help: "help"},
+							{Ref: 5, Unit: "unit", Help: "help"},
+						}, nil)
+						require.NoError(t, w.Log(b))
+					}
+					// Write samples until the WAL has enough segments.
+					// Make them have drifting timestamps within a record to see that they
+					// get filtered properly.
+					b := enc.Samples([]record.RefSample{
+						{Ref: 0, T: last, V: float64(i)},
+						{Ref: 1, T: last + 10000, V: float64(i)},
+						{Ref: 2, T: last + 20000, V: float64(i)},
+						{Ref: 3, T: last + 30000, V: float64(i)},
+					}, nil)
+					require.NoError(t, w.Log(b))
+					samplesInWAL += 4
+					h := makeHistogram(i)
+					b, _ = enc.HistogramSamples([]record.RefHistogramSample{
+						{Ref: 0, T: last, H: h},
+						{Ref: 1, T: last + 10000, H: h},
+						{Ref: 2, T: last + 20000, H: h},
+						{Ref: 3, T: last + 30000, H: h},
+					}, nil)
+					require.NoError(t, w.Log(b))
+					histogramsInWAL += 4
+					cbh := makeCustomBucketHistogram(i)
+					b = enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{
+						{Ref: 0, T: last, H: cbh},
+						{Ref: 1, T: last + 10000, H: cbh},
+						{Ref: 2, T: last + 20000, H: cbh},
+						{Ref: 3, T: last + 30000, H: cbh},
+					}, nil)
+					require.NoError(t, w.Log(b))
+					histogramsInWAL += 4
+					fh := makeFloatHistogram(i)
+					b, _ = enc.FloatHistogramSamples([]record.RefFloatHistogramSample{
+						{Ref: 0, T: last, FH: fh},
+						{Ref: 1, T: last + 10000, FH: fh},
+						{Ref: 2, T: last + 20000, FH: fh},
+						{Ref: 3, T: last + 30000, FH: fh},
+					}, nil)
+					require.NoError(t, w.Log(b))
+					floatHistogramsInWAL += 4
+					cbfh := makeCustomBucketFloatHistogram(i)
+					b = enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{
+						{Ref: 0, T: last, FH: cbfh},
+						{Ref: 1, T: last + 10000, FH: cbfh},
+						{Ref: 2, T: last + 20000, FH: cbfh},
+						{Ref: 3, T: last + 30000, FH: cbfh},
+					}, nil)
+					require.NoError(t, w.Log(b))
+					floatHistogramsInWAL += 4
+
+					b = enc.Exemplars([]record.RefExemplar{
+						{Ref: 1, T: last, V: float64(i), Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i))},
 					}, nil)
 					require.NoError(t, w.Log(b))

+					// Write changing metadata for each series. In the end, only the latest
+					// version should end up in the checkpoint.
 					b = enc.Metadata([]record.RefMetadata{
-						{Ref: 2, Unit: "unit", Help: "help"},
-						{Ref: 3, Unit: "unit", Help: "help"},
-						{Ref: 4, Unit: "unit", Help: "help"},
-						{Ref: 5, Unit: "unit", Help: "help"},
+						{Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
+						{Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
+						{Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
+						{Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
 					}, nil)
 					require.NoError(t, w.Log(b))
+
+					last += 100
 				}
-				// Write samples until the WAL has enough segments.
-				// Make them have drifting timestamps within a record to see that they
-				// get filtered properly.
-				b := enc.Samples([]record.RefSample{
-					{Ref: 0, T: last, V: float64(i)},
-					{Ref: 1, T: last + 10000, V: float64(i)},
-					{Ref: 2, T: last + 20000, V: float64(i)},
-					{Ref: 3, T: last + 30000, V: float64(i)},
-				}, nil)
-				require.NoError(t, w.Log(b))
-				samplesInWAL += 4
-				h := makeHistogram(i)
-				b, _ = enc.HistogramSamples([]record.RefHistogramSample{
-					{Ref: 0, T: last, H: h},
-					{Ref: 1, T: last + 10000, H: h},
-					{Ref: 2, T: last + 20000, H: h},
-					{Ref: 3, T: last + 30000, H: h},
-				}, nil)
-				require.NoError(t, w.Log(b))
-				histogramsInWAL += 4
-				cbh := makeCustomBucketHistogram(i)
-				b = enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{
-					{Ref: 0, T: last, H: cbh},
-					{Ref: 1, T: last + 10000, H: cbh},
-					{Ref: 2, T: last + 20000, H: cbh},
-					{Ref: 3, T: last + 30000, H: cbh},
-				}, nil)
-				require.NoError(t, w.Log(b))
-				histogramsInWAL += 4
-				fh := makeFloatHistogram(i)
-				b, _ = enc.FloatHistogramSamples([]record.RefFloatHistogramSample{
-					{Ref: 0, T: last, FH: fh},
-					{Ref: 1, T: last + 10000, FH: fh},
-					{Ref: 2, T: last + 20000, FH: fh},
-					{Ref: 3, T: last + 30000, FH: fh},
-				}, nil)
-				require.NoError(t, w.Log(b))
-				floatHistogramsInWAL += 4
-				cbfh := makeCustomBucketFloatHistogram(i)
-				b = enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{
-					{Ref: 0, T: last, FH: cbfh},
-					{Ref: 1, T: last + 10000, FH: cbfh},
-					{Ref: 2, T: last + 20000, FH: cbfh},
-					{Ref: 3, T: last + 30000, FH: cbfh},
-				}, nil)
-				require.NoError(t, w.Log(b))
-				floatHistogramsInWAL += 4
+				require.NoError(t, w.Close())

-				b = enc.Exemplars([]record.RefExemplar{
-					{Ref: 1, T: last, V: float64(i), Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i))},
-				}, nil)
-				require.NoError(t, w.Log(b))
+				stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool {
+					return x%2 == 0
+				}, last/2, enableSTStorage)
+				require.NoError(t, err)
+				require.NoError(t, w.Truncate(107))
+				require.NoError(t, DeleteCheckpoints(w.Dir(), 106))
+				require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples)
+				require.Positive(t, stats.DroppedSamples)

-				// Write changing metadata for each series. In the end, only the latest
-				// version should end up in the checkpoint.
-				b = enc.Metadata([]record.RefMetadata{
-					{Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
-					{Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
-					{Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
-					{Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)},
-				}, nil)
-				require.NoError(t, w.Log(b))
+				// Only the new checkpoint should be left.
+				files, err := os.ReadDir(dir)
+				require.NoError(t, err)
+				require.Len(t, files, 1)
+				require.Equal(t, "checkpoint.00000106", files[0].Name())

-				last += 100
-			}
-			require.NoError(t, w.Close())
+				sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.00000106"))
+				require.NoError(t, err)
+				defer sr.Close()

-			stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool {
-				return x%2 == 0
-			}, last/2)
-			require.NoError(t, err)
-			require.NoError(t, w.Truncate(107))
-			require.NoError(t, DeleteCheckpoints(w.Dir(), 106))
-			require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples)
-			require.Positive(t, stats.DroppedSamples)
+				dec := record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
+				var series []record.RefSeries
+				var metadata []record.RefMetadata
+				r := NewReader(sr)

-			// Only the new checkpoint should be left.
-			files, err := os.ReadDir(dir)
-			require.NoError(t, err)
-			require.Len(t, files, 1)
-			require.Equal(t, "checkpoint.00000106", files[0].Name())
+				samplesInCheckpoint, histogramsInCheckpoint, floatHistogramsInCheckpoint := 0, 0, 0
+				for r.Next() {
+					rec := r.Record()

-			sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.00000106"))
-			require.NoError(t, err)
-			defer sr.Close()
-
-			dec := record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger())
-			var series []record.RefSeries
-			var metadata []record.RefMetadata
-			r := NewReader(sr)
-
-			samplesInCheckpoint, histogramsInCheckpoint, floatHistogramsInCheckpoint := 0, 0, 0
-			for r.Next() {
-				rec := r.Record()
-
-				switch dec.Type(rec) {
-				case record.Series:
-					series, err = dec.Series(rec, series)
-					require.NoError(t, err)
-				case record.Samples:
-					samples, err := dec.Samples(rec, nil)
-					require.NoError(t, err)
-					for _, s := range samples {
-						require.GreaterOrEqual(t, s.T, last/2, "sample with wrong timestamp")
+					switch dec.Type(rec) {
+					case record.Series:
+						series, err = dec.Series(rec, series)
+						require.NoError(t, err)
+					case record.Samples, record.SamplesV2:
+						samples, err := dec.Samples(rec, nil)
+						require.NoError(t, err)
+						for _, s := range samples {
+							require.GreaterOrEqual(t, s.T, last/2, "sample with wrong timestamp")
+						}
+						samplesInCheckpoint += len(samples)
+					case record.HistogramSamples, record.CustomBucketsHistogramSamples:
+						histograms, err := dec.HistogramSamples(rec, nil)
+						require.NoError(t, err)
+						for _, h := range histograms {
+							require.GreaterOrEqual(t, h.T, last/2, "histogram with wrong timestamp")
+						}
+						histogramsInCheckpoint += len(histograms)
+					case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
+						floatHistograms, err := dec.FloatHistogramSamples(rec, nil)
+						require.NoError(t, err)
+						for _, h := range floatHistograms {
+							require.GreaterOrEqual(t, h.T, last/2, "float histogram with wrong timestamp")
+						}
+						floatHistogramsInCheckpoint += len(floatHistograms)
+					case record.Exemplars:
+						exemplars, err := dec.Exemplars(rec, nil)
+						require.NoError(t, err)
+						for _, e := range exemplars {
+							require.GreaterOrEqual(t, e.T, last/2, "exemplar with wrong timestamp")
+						}
+					case record.Metadata:
+						metadata, err = dec.Metadata(rec, metadata)
+						require.NoError(t, err)
 					}
-					samplesInCheckpoint += len(samples)
-				case record.HistogramSamples, record.CustomBucketsHistogramSamples:
-					histograms, err := dec.HistogramSamples(rec, nil)
-					require.NoError(t, err)
-					for _, h := range histograms {
-						require.GreaterOrEqual(t, h.T, last/2, "histogram with wrong timestamp")
-					}
-					histogramsInCheckpoint += len(histograms)
-				case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples:
-					floatHistograms, err := dec.FloatHistogramSamples(rec, nil)
-					require.NoError(t, err)
-					for _, h := range floatHistograms {
-						require.GreaterOrEqual(t, h.T, last/2, "float histogram with wrong timestamp")
-					}
-					floatHistogramsInCheckpoint += len(floatHistograms)
-				case record.Exemplars:
-					exemplars, err := dec.Exemplars(rec, nil)
-					require.NoError(t, err)
-					for _, e := range exemplars {
-						require.GreaterOrEqual(t, e.T, last/2, "exemplar with wrong timestamp")
-					}
-				case record.Metadata:
-					metadata, err = dec.Metadata(rec, metadata)
-					require.NoError(t, err)
 				}
-			}
-			require.NoError(t, r.Err())
-			// Making sure we replayed some samples. We expect >50% samples to be still present.
-			require.Greater(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.5)
-			require.Less(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.8)
-			require.Greater(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.5)
-			require.Less(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.8)
-			require.Greater(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.5)
-			require.Less(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.8)
+				require.NoError(t, r.Err())
+				// Making sure we replayed some samples. We expect >50% samples to be still present.
+				require.Greater(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.5)
+				require.Less(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.8)
+				require.Greater(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.5)
+				require.Less(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.8)
+				require.Greater(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.5)
+				require.Less(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.8)

-			expectedRefSeries := []record.RefSeries{
-				{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
-				{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
-				{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
-			}
-			testutil.RequireEqual(t, expectedRefSeries, series)
+				expectedRefSeries := []record.RefSeries{
+					{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
+					{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
+					{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
+				}
+				testutil.RequireEqual(t, expectedRefSeries, series)

-			expectedRefMetadata := []record.RefMetadata{
-				{Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
-				{Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
-				{Ref: 4, Unit: "unit", Help: "help"},
-			}
-			sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref })
-			require.Equal(t, expectedRefMetadata, metadata)
-		})
+				expectedRefMetadata := []record.RefMetadata{
+					{Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
+					{Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)},
+					{Ref: 4, Unit: "unit", Help: "help"},
+				}
+				sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref })
+				require.Equal(t, expectedRefMetadata, metadata)
+			})
+		}
 	}
 }

 func TestCheckpointNoTmpFolderAfterError(t *testing.T) {
-	// Create a new wlog with invalid data.
-	dir := t.TempDir()
-	w, err := NewSize(nil, nil, dir, 64*1024, compression.None)
-	require.NoError(t, err)
-	var enc record.Encoder
-	require.NoError(t, w.Log(enc.Series([]record.RefSeries{
-		{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "2")},
-	}, nil)))
-	require.NoError(t, w.Close())
+	for _, enableSTStorage := range []bool{false, true} {
+		t.Run("enableSTStorage="+strconv.FormatBool(enableSTStorage), func(t *testing.T) {
+			// Create a new wlog with invalid data.
+			dir := t.TempDir()
+			w, err := NewSize(nil, nil, dir, 64*1024, compression.None)
+			require.NoError(t, err)
+			enc := record.Encoder{EnableSTStorage: enableSTStorage}
+			require.NoError(t, w.Log(enc.Series([]record.RefSeries{
+				{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "2")},
+			}, nil)))
+			require.NoError(t, w.Close())

-	// Corrupt data.
-	f, err := os.OpenFile(filepath.Join(w.Dir(), "00000000"), os.O_WRONLY, 0o666)
-	require.NoError(t, err)
-	_, err = f.WriteAt([]byte{42}, 1)
-	require.NoError(t, err)
-	require.NoError(t, f.Close())
+			// Corrupt data.
+			f, err := os.OpenFile(filepath.Join(w.Dir(), "00000000"), os.O_WRONLY, 0o666)
+			require.NoError(t, err)
+			_, err = f.WriteAt([]byte{42}, 1)
+			require.NoError(t, err)
+			require.NoError(t, f.Close())

-	// Run the checkpoint and since the wlog contains corrupt data this should return an error.
-	_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0)
-	require.Error(t, err)
+			// Run the checkpoint and since the wlog contains corrupt data this should return an error.
+			_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0, enableSTStorage)
+			require.Error(t, err)

-	// Walk the wlog dir to make sure there are no tmp folder left behind after the error.
-	err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
-		if err != nil {
-			return fmt.Errorf("access err %q: %w", path, err)
-		}
-		if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
-			return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name())
-		}
-		return nil
-	})
-	require.NoError(t, err)
+			// Walk the wlog dir to make sure there are no tmp folder left behind after the error.
+			err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
+				if err != nil {
+					return fmt.Errorf("access err %q: %w", path, err)
+				}
+				if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
+					return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name())
+				}
+				return nil
+			})
+			require.NoError(t, err)
+		})
+	}
 }

 func TestCheckpointDeletesTemporaryCheckpoints(t *testing.T) {
@ -428,7 +434,7 @@ func TestCheckpointDeletesTemporaryCheckpoints(t *testing.T) {
 	require.NoError(t, err)
 	defer w.Close()

-	_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1000, func(_ chunks.HeadSeriesRef) bool { return true }, 1000)
+	_, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1000, func(_ chunks.HeadSeriesRef) bool { return true }, 1000, false)
 	require.NoError(t, err)

 	files, err := os.ReadDir(dir)
--- a/tsdb/wlog/watcher.go
+++ b/tsdb/wlog/watcher.go
@ -543,7 +543,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 			}
 			w.writer.StoreSeries(series, segmentNum)

-		case record.Samples:
+		case record.Samples, record.SamplesV2:
 			// If we're not tailing a segment we can ignore any samples records we see.
 			// This speeds up replay of the WAL by > 10x.
 			if !tail {
--- a/tsdb/wlog/watcher_test.go
+++ b/tsdb/wlog/watcher_test.go
--- a/util/kahansum/kahansum.go
+++ b/util/kahansum/kahansum.go
@ -16,10 +16,21 @@ package kahansum
 import "math"

 // Inc performs addition of two floating-point numbers using the Kahan summation algorithm.
-// We get incorrect results if this function is inlined; see https://github.com/prometheus/prometheus/issues/16714.
-//
-//go:noinline
 func Inc(inc, sum, c float64) (newSum, newC float64) {
+	// We've seen Kahan summation return less accurate results when Inc function is
+	// allowed to be inlined (see https://github.com/prometheus/prometheus/pull/16895).
+	// Go permits fusing float operations (e.g. using fused multiply-add, which allows
+	// calculating a*b+c without rounding the result of a*b to precision available in float64),
+	// and Kahan sum is sensitive to float rounding behavior. Instead of forbidding inlining
+	// (which only disallows fusing operations outside of Inc with operations  happening inside)
+	// and eating the performance cost of non-inlined function calls, we forbid just the fusing
+	// across Inc call boundary. We can do that by explicitly requesting Inc arguments and results
+	// to be rounded to float64 precision, as documented in go spec (https://go.dev/ref/spec#Floating_point_operators).
+	// The following casts are not no-ops!
+	inc = float64(inc)
+	sum = float64(sum)
+	c = float64(c)
+
 	t := sum + inc
 	switch {
 	case math.IsInf(t, 0):
@ -31,6 +42,9 @@ func Inc(inc, sum, c float64) (newSum, newC float64) {
 	default:
 		c += (inc - t) + sum
 	}
+
+	t = float64(t)
+	c = float64(c)
 	return t, c
 }

--- a/util/testrecord/record.go
+++ b/util/testrecord/record.go
@ -0,0 +1,96 @@
+// Copyright 2025 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testrecord
+
+import (
+	"math"
+	"testing"
+
+	"github.com/prometheus/prometheus/tsdb/chunks"
+	"github.com/prometheus/prometheus/tsdb/record"
+)
+
+type RefSamplesCase string
+
+const (
+	Realistic1000Samples               RefSamplesCase = "real1000"
+	Realistic1000WithVariableSTSamples RefSamplesCase = "real1000-vst"
+	Realistic1000WithConstSTSamples    RefSamplesCase = "real1000-cst"
+	WorstCase1000                      RefSamplesCase = "worst1000"
+	WorstCase1000WithSTSamples         RefSamplesCase = "worst1000-st"
+)
+
+func GenTestRefSamplesCase(t testing.TB, c RefSamplesCase) []record.RefSample {
+	t.Helper()
+
+	ret := make([]record.RefSample, 1e3)
+	switch c {
+	// Samples are across series, so likely all have the same timestamp.
+	case Realistic1000Samples:
+		for i := range ret {
+			ret[i].Ref = chunks.HeadSeriesRef(i)
+			ret[i].T = int64(12423423)
+			ret[i].V = highVarianceFloat(i)
+		}
+	// Likely the start times will all be the same with deltas.
+	case Realistic1000WithConstSTSamples:
+		for i := range ret {
+			ret[i].Ref = chunks.HeadSeriesRef(i)
+			ret[i].ST = int64(12423423)
+			ret[i].T = int64(12423423 + 15)
+			ret[i].V = highVarianceFloat(i)
+		}
+	// Maybe series have different start times though
+	case Realistic1000WithVariableSTSamples:
+		for i := range ret {
+			ret[i].Ref = chunks.HeadSeriesRef(i)
+			ret[i].ST = int64((12423423 / 9) * (i % 10))
+			ret[i].T = int64(12423423)
+			ret[i].V = highVarianceFloat(i)
+		}
+	case WorstCase1000:
+		for i := range ret {
+			ret[i].Ref = chunks.HeadSeriesRef(i)
+			ret[i].T = highVarianceInt(i)
+			ret[i].V = highVarianceFloat(i)
+		}
+	case WorstCase1000WithSTSamples:
+		for i := range ret {
+			ret[i].Ref = chunks.HeadSeriesRef(i)
+
+			// Worst case is when the values are significantly different
+			// to each other which breaks delta encoding.
+			ret[i].ST = highVarianceInt(i+1) / 1024 // Make sure ST is not comparable to T
+			ret[i].T = highVarianceInt(i)
+			ret[i].V = highVarianceFloat(i)
+		}
+	default:
+		t.Fatal("unknown case", c)
+	}
+	return ret
+}
+
+func highVarianceInt(i int) int64 {
+	if i%2 == 0 {
+		return math.MinInt32
+	}
+	return math.MaxInt32
+}
+
+func highVarianceFloat(i int) float64 {
+	if i%2 == 0 {
+		return math.SmallestNonzeroFloat32
+	}
+	return math.MaxFloat32
+}
--- a/util/testwal/records.go
+++ b/util/testwal/records.go
@ -48,6 +48,8 @@ type RecordsCase struct {
 	// HistogramFn source histogram for histogram and float histogram records.
 	// By default, newTestHist is used (exponential bucketing)
 	HistogramFn func(ref int) *histogram.Histogram
+	// NoST controls if ref samples should skip generating Start Timestamps. If true, ST is 0.
+	NoST bool
 }

 // Records represents batches of generated WAL records.
@ -118,10 +120,18 @@ func GenerateRecords(c RecordsCase) (ret Records) {
 			Help: fmt.Sprintf("help text for %d", ref),
 		}
 		for j := range c.SamplesPerSeries {
+			ts := c.TsFn(ref, j)
+			// Keep ST simple for now; we don't test the exact semantics.
+			// We can improve later (e.g. STsFN).
+			sts := ts - 1
+			if c.NoST {
+				sts = 0
+			}
+
 			ret.Samples[i*c.SamplesPerSeries+j] = record.RefSample{
 				Ref: chunks.HeadSeriesRef(ref),
-				T:   c.TsFn(ref, j),
-				V:   float64(ref),
+				ST:  sts, T: ts,
+				V: float64(ref),
 			}
 		}
 		h := c.HistogramFn(ref)
--- a/web/ui/mantine-ui/src/promql/functionDocs.tsx
+++ b/web/ui/mantine-ui/src/promql/functionDocs.tsx
@ -1257,7 +1257,7 @@ const funcDocs: Record<string, React.ReactNode> = {
    <>
      <p>
        <code>histogram_avg(v instant-vector)</code> returns the arithmetic average of observed values stored in each
-        histogram sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
+        native histogram sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
      </p>

      <p>
@ -1283,13 +1283,13 @@ const funcDocs: Record<string, React.ReactNode> = {
  histogram_count: (
    <>
      <p>
-        <code>histogram_count(v instant-vector)</code> returns the count of observations stored in each histogram sample
-        in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
+        <code>histogram_count(v instant-vector)</code> returns the count of observations stored in each native histogram
+        sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
      </p>

      <p>
-        Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each histogram
-        sample.
+        Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each native
+        histogram sample.
      </p>

      <p>
@ -1574,15 +1574,15 @@ const funcDocs: Record<string, React.ReactNode> = {
    <>
      <p>
        <code>histogram_stddev(v instant-vector)</code> returns the estimated standard deviation of observations for
-        each histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to have
-        the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries, the
-        arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
+        each native histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to
+        have the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries,
+        the arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
        ignored and do not show up in the returned vector.
      </p>

      <p>
        Similarly, <code>histogram_stdvar(v instant-vector)</code> returns the estimated standard variance of
-        observations for each histogram sample in <code>v</code>.
+        observations for each native histogram sample in <code>v</code>.
      </p>
    </>
  ),
@ -1590,28 +1590,28 @@ const funcDocs: Record<string, React.ReactNode> = {
    <>
      <p>
        <code>histogram_stddev(v instant-vector)</code> returns the estimated standard deviation of observations for
-        each histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to have
-        the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries, the
-        arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
+        each native histogram sample in <code>v</code>. For this estimation, all observations in a bucket are assumed to
+        have the value of the mean of the bucket boundaries. For the zero bucket and for buckets with custom boundaries,
+        the arithmetic mean is used. For the usual exponential buckets, the geometric mean is used. Float samples are
        ignored and do not show up in the returned vector.
      </p>

      <p>
        Similarly, <code>histogram_stdvar(v instant-vector)</code> returns the estimated standard variance of
-        observations for each histogram sample in <code>v</code>.
+        observations for each native histogram sample in <code>v</code>.
      </p>
    </>
  ),
  histogram_sum: (
    <>
      <p>
-        <code>histogram_count(v instant-vector)</code> returns the count of observations stored in each histogram sample
-        in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
+        <code>histogram_count(v instant-vector)</code> returns the count of observations stored in each native histogram
+        sample in <code>v</code>. Float samples are ignored and do not show up in the returned vector.
      </p>

      <p>
-        Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each histogram
-        sample.
+        Similarly, <code>histogram_sum(v instant-vector)</code> returns the sum of observations stored in each native
+        histogram sample.
      </p>

      <p>
--- a/web/ui/module/codemirror-promql/src/complete/promql.terms.ts
+++ b/web/ui/module/codemirror-promql/src/complete/promql.terms.ts
@ -317,10 +317,16 @@ export const functionIdentifierTerms = [
    info: 'Join together label values into new label',
    type: 'function',
  },
+  {
+    label: 'first_over_time',
+    detail: 'function',
+    info: 'Return the value of the oldest sample in the specified interval',
+    type: 'function',
+  },
  {
    label: 'last_over_time',
    detail: 'function',
-    info: 'The most recent point value in specified interval.',
+    info: 'Return the value of the most recent sample in the specified interval',
    type: 'function',
  },
  {
@ -371,6 +377,12 @@ export const functionIdentifierTerms = [
    info: 'Return the timestamp of the minimum value over time for input series',
    type: 'function',
  },
+  {
+    label: 'ts_of_first_over_time',
+    detail: 'function',
+    info: 'Return the timestamp of the first value over time for input series',
+    type: 'function',
+  },
  {
    label: 'ts_of_last_over_time',
    detail: 'function',
--- a/web/web.go
+++ b/web/web.go
@ -253,6 +253,11 @@ func (h *Handler) ApplyConfig(conf *config.Config) error {
 	defer h.mtx.Unlock()

 	h.config = conf
+	if conf.StorageConfig.TSDBConfig != nil && conf.StorageConfig.TSDBConfig.Retention != nil {
+		h.options.TSDBRetentionDuration = conf.StorageConfig.TSDBConfig.Retention.Time
+		h.options.TSDBMaxBytes = conf.StorageConfig.TSDBConfig.Retention.Size
+		h.options.TSDBMaxPercentage = conf.StorageConfig.TSDBConfig.Retention.Percentage
+	}

 	return nil
 }
@ -866,20 +871,25 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) {
 	status.Hostname = hostname
 	status.ServerTime = time.Now().UTC()

-	if h.options.TSDBRetentionDuration != 0 {
-		status.StorageRetention = h.options.TSDBRetentionDuration.String()
+	h.mtx.RLock()
+	tsdbRetentionDuration := h.options.TSDBRetentionDuration
+	tsdbMaxBytes := h.options.TSDBMaxBytes
+	tsdbMaxPercentage := h.options.TSDBMaxPercentage
+	h.mtx.RUnlock()
+	if tsdbRetentionDuration != 0 {
+		status.StorageRetention = tsdbRetentionDuration.String()
 	}
-	if h.options.TSDBMaxBytes != 0 {
+	if tsdbMaxBytes != 0 {
 		if status.StorageRetention != "" {
 			status.StorageRetention += " or "
 		}
-		status.StorageRetention += h.options.TSDBMaxBytes.String()
+		status.StorageRetention += tsdbMaxBytes.String()
 	}
-	if h.options.TSDBMaxPercentage != 0 {
+	if tsdbMaxPercentage != 0 {
 		if status.StorageRetention != "" {
 			status.StorageRetention += " or "
 		}
-		status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(h.options.TSDBMaxPercentage), 10) + "%"
+		status.StorageRetention = status.StorageRetention + strconv.FormatUint(uint64(tsdbMaxPercentage), 10) + "%"
 	}

 	metrics, err := h.gatherer.Gather()