From 664b25569945aa21f8cae8dfd4d83891aa668cef Mon Sep 17 00:00:00 2001
From: Bartlomiej Plotka <bwplotka@gmail.com>
Date: Wed, 21 Jan 2026 08:21:56 +0000
Subject: [PATCH] Merge pull request #17867 from
 prometheus/bwplotka/a2-scrape-1

refactor(appenderV2)[PART5a]: add AppendableV2 support to scrape loop + tests
---
 rules/manager_test.go             |    5 +-
 scrape/helpers_test.go            |   93 +-
 scrape/manager.go                 |    5 +-
 scrape/manager_test.go            |    4 +-
 scrape/scrape.go                  |   68 +-
 scrape/scrape_append_v2.go        |  416 +++++++++
 scrape/scrape_test.go             | 1309 +++++++++++++++++++++--------
 scrape/target.go                  |   99 +++
 scrape/target_test.go             |  212 +++--
 util/teststorage/appender.go      |   94 ++-
 util/teststorage/appender_test.go |   75 +-
 util/teststorage/storage.go       |   28 +-
 12 files changed, 1878 insertions(+), 530 deletions(-)
 create mode 100644 scrape/scrape_append_v2.go

diff --git a/rules/manager_test.go b/rules/manager_test.go
index 0991e8198a..a716304b7a 100644
--- a/rules/manager_test.go
+++ b/rules/manager_test.go
@@ -45,6 +45,7 @@ import (
 	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/prometheus/prometheus/promql/promqltest"
 	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
 	"github.com/prometheus/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/prometheus/util/teststorage"
@@ -1201,7 +1202,9 @@ func TestRuleMovedBetweenGroups(t *testing.T) {
 		t.Skip("skipping test in short mode.")
 	}
 
-	storage := teststorage.New(t, 600000)
+	storage := teststorage.New(t, func(opt *tsdb.Options) {
+		opt.OutOfOrderTimeWindow = 600000
+	})
 	defer storage.Close()
 	opts := promql.EngineOpts{
 		Logger:     nil,
diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go
index dd5179b360..1db229561d 100644
--- a/scrape/helpers_test.go
+++ b/scrape/helpers_test.go
@@ -17,6 +17,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/binary"
+	"fmt"
 	"net/http"
 	"testing"
 	"time"
@@ -38,15 +39,22 @@ import (
 // For readability.
 type sample = teststorage.Sample
 
+type compatAppendable interface {
+	storage.Appendable
+	storage.AppendableV2
+}
+
 func withCtx(ctx context.Context) func(sl *scrapeLoop) {
 	return func(sl *scrapeLoop) {
 		sl.ctx = ctx
 	}
 }
 
-func withAppendable(appendable storage.Appendable) func(sl *scrapeLoop) {
+func withAppendable(app compatAppendable, appV2 bool) func(sl *scrapeLoop) {
 	return func(sl *scrapeLoop) {
-		sl.appendable = appendable
+		sa := selectAppendable(app, appV2)
+		sl.appendable = sa.V1()
+		sl.appendableV2 = sa.V2()
 	}
 }
 
@@ -55,8 +63,7 @@ func withAppendable(appendable storage.Appendable) func(sl *scrapeLoop) {
 //
 // It's recommended to use withXYZ functions for simple option customizations, e.g:
 //
-//	appTest := teststorage.NewAppendable()
-//	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+//	sl, _ := newTestScrapeLoop(t, withCtx(customCtx))
 //
 // However, when changing more than one scrapeLoop options it's more readable to have one explicit opt function:
 //
@@ -64,7 +71,7 @@ func withAppendable(appendable storage.Appendable) func(sl *scrapeLoop) {
 //	appTest := teststorage.NewAppendable()
 //	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
 //		sl.ctx = ctx
-//		sl.appendable = appTest
+//		sl.appendableV2 = appTest
 //		// Since we're writing samples directly below we need to provide a protocol fallback.
 //		sl.fallbackScrapeProtocol = "text/plain"
 //	})
@@ -84,8 +91,6 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo
 		timeout:             1 * time.Hour,
 		sampleMutator:       nopMutator,
 		reportSampleMutator: nopMutator,
-
-		appendable:          teststorage.NewAppendable(),
 		buffers:             pool.New(1e3, 1e6, 3, func(sz int) any { return make([]byte, 0, sz) }),
 		metrics:             metrics,
 		maxSchema:           histogram.ExponentialSchemaMax,
@@ -98,6 +103,11 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo
 	for _, o := range opts {
 		o(sl)
 	}
+
+	if sl.appendable != nil && sl.appendableV2 != nil {
+		t.Fatal("select the appendable to use, both were passed, likely a bug")
+	}
+
 	// Validate user opts for convenience.
 	require.Nil(t, sl.parentCtx, "newTestScrapeLoop does not support injecting non-nil parent context")
 	require.Nil(t, sl.appenderCtx, "newTestScrapeLoop does not support injecting non-nil appender context")
@@ -121,7 +131,8 @@ func newTestScrapeLoop(t testing.TB, opts ...func(sl *scrapeLoop)) (_ *scrapeLoo
 	return sl, scraper
 }
 
-func newTestScrapePool(t *testing.T, injectNewLoop func(options scrapeLoopOptions) loop) *scrapePool {
+func newTestScrapePool(t *testing.T, app compatAppendable, appV2 bool, injectNewLoop func(options scrapeLoopOptions) loop) *scrapePool {
+	sa := selectAppendable(app, appV2)
 	return &scrapePool{
 		ctx:     t.Context(),
 		cancel:  func() {},
@@ -134,7 +145,8 @@ func newTestScrapePool(t *testing.T, injectNewLoop func(options scrapeLoopOption
 		loops:             map[uint64]loop{},
 		injectTestNewLoop: injectNewLoop,
 
-		appendable:  teststorage.NewAppendable(),
+		appendable: sa.V1(), appendableV2: sa.V2(),
+
 		symbolTable: labels.NewSymbolTable(),
 		metrics:     newTestScrapeMetrics(t),
 	}
@@ -158,3 +170,66 @@ func protoMarshalDelimited(t *testing.T, mf *dto.MetricFamily) []byte {
 	buf.Write(protoBuf)
 	return buf.Bytes()
 }
+
+type selectedAppendable struct {
+	useV2 bool
+	app   compatAppendable
+}
+
+// V1 returns Appendable if V1 is selected, otherwise nil.
+func (s selectedAppendable) V1() storage.Appendable {
+	if s.useV2 {
+		return nil
+	}
+	return s.app
+}
+
+// V2 returns AppendableV2 if V2 is selected, otherwise nil.
+func (s selectedAppendable) V2() storage.AppendableV2 {
+	if !s.useV2 {
+		return nil
+	}
+	return s.app
+}
+
+// selectAppendable allows to specify which appendable callers should use when the struct
+// implements both. This is how all callers are making the decision - if one appendable is nil, they
+// take another. selectAppendable allows to inject nil to e.g. storage.AppendableV2 when appV2 is false.
+func selectAppendable(app compatAppendable, appV2 bool) selectedAppendable {
+	s := selectedAppendable{
+		app:   app,
+		useV2: appV2,
+	}
+	return s
+}
+
+func foreachAppendable(t *testing.T, f func(t *testing.T, appV2 bool)) {
+	for _, appV2 := range []bool{false, true} {
+		t.Run(fmt.Sprintf("appV2=%v", appV2), func(t *testing.T) {
+			f(t, appV2)
+		})
+	}
+}
+
+func TestSelectAppendable(t *testing.T) {
+	var i int
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		defer func() { i++ }()
+		switch i {
+		case 0:
+			require.False(t, appV2)
+
+			s := selectAppendable(teststorage.NewAppendable(), appV2)
+			require.NotNil(t, s.V1())
+			require.Nil(t, s.V2())
+		case 1:
+			require.True(t, appV2)
+
+			s := selectAppendable(teststorage.NewAppendable(), appV2)
+			require.Nil(t, s.V1())
+			require.NotNil(t, s.V2())
+		default:
+			t.Fatal("too many iterations")
+		}
+	})
+}
diff --git a/scrape/manager.go b/scrape/manager.go
index a2297aa824..ef226ad507 100644
--- a/scrape/manager.go
+++ b/scrape/manager.go
@@ -114,7 +114,8 @@ type Manager struct {
 	opts   *Options
 	logger *slog.Logger
 
-	appendable storage.Appendable
+	appendable   storage.Appendable
+	appendableV2 storage.AppendableV2
 
 	graceShut chan struct{}
 
@@ -196,7 +197,7 @@ func (m *Manager) reload() {
 				continue
 			}
 			m.metrics.targetScrapePools.Inc()
-			sp, err := newScrapePool(scrapeConfig, m.appendable, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics)
+			sp, err := newScrapePool(scrapeConfig, m.appendable, m.appendableV2, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics)
 			if err != nil {
 				m.metrics.targetScrapePoolsFailed.Inc()
 				m.logger.Error("error creating new scrape pool", "err", err, "scrape_pool", setName)
diff --git a/scrape/manager_test.go b/scrape/manager_test.go
index d4898eb996..8b289cb7e2 100644
--- a/scrape/manager_test.go
+++ b/scrape/manager_test.go
@@ -528,7 +528,7 @@ scrape_configs:
 		ch <- struct{}{}
 		return noopLoop()
 	}
-	sp := newTestScrapePool(t, newLoop)
+	sp := newTestScrapePool(t, nil, false, newLoop)
 	sp.activeTargets[1] = &Target{}
 	sp.loops[1] = noopLoop()
 	sp.config = cfg1.ScrapeConfigs[0]
@@ -684,7 +684,7 @@ scrape_configs:
 			_, cancel := context.WithCancel(context.Background())
 			defer cancel()
 
-			sp := newTestScrapePool(t, newLoop)
+			sp := newTestScrapePool(t, nil, false, newLoop)
 			sp.loops[1] = noopLoop()
 			sp.config = cfg1.ScrapeConfigs[0]
 			sp.metrics = scrapeManager.metrics
diff --git a/scrape/scrape.go b/scrape/scrape.go
index 58df858b3d..d5a9ba72b4 100644
--- a/scrape/scrape.go
+++ b/scrape/scrape.go
@@ -82,11 +82,12 @@ type FailureLogger interface {
 
 // scrapePool manages scrapes for sets of targets.
 type scrapePool struct {
-	appendable storage.Appendable
-	logger     *slog.Logger
-	ctx        context.Context
-	cancel     context.CancelFunc
-	options    *Options
+	appendable   storage.Appendable
+	appendableV2 storage.AppendableV2
+	logger       *slog.Logger
+	ctx          context.Context
+	cancel       context.CancelFunc
+	options      *Options
 
 	// mtx must not be taken after targetMtx.
 	mtx    sync.Mutex
@@ -139,6 +140,7 @@ type scrapeLoopAppendAdapter interface {
 func newScrapePool(
 	cfg *config.ScrapeConfig,
 	appendable storage.Appendable,
+	appendableV2 storage.AppendableV2,
 	offsetSeed uint64,
 	logger *slog.Logger,
 	buffers *pool.Pool,
@@ -171,6 +173,7 @@ func newScrapePool(
 	ctx, cancel := context.WithCancel(context.Background())
 	sp := &scrapePool{
 		appendable:           appendable,
+		appendableV2:         appendableV2,
 		logger:               logger,
 		ctx:                  ctx,
 		cancel:               cancel,
@@ -842,11 +845,12 @@ type scrapeLoop struct {
 	scraper             scraper
 
 	// Static params per scrapePool.
-	appendable  storage.Appendable
-	buffers     *pool.Pool
-	offsetSeed  uint64
-	symbolTable *labels.SymbolTable
-	metrics     *scrapeMetrics
+	appendable   storage.Appendable
+	appendableV2 storage.AppendableV2
+	buffers      *pool.Pool
+	offsetSeed   uint64
+	symbolTable  *labels.SymbolTable
+	metrics      *scrapeMetrics
 
 	// Options from config.ScrapeConfig.
 	sampleLimit                   int
@@ -1190,11 +1194,12 @@ func newScrapeLoop(opts scrapeLoopOptions) *scrapeLoop {
 		scraper:             opts.scraper,
 
 		// Static params per scrapePool.
-		appendable:  opts.sp.appendable,
-		buffers:     opts.sp.buffers,
-		offsetSeed:  opts.sp.offsetSeed,
-		symbolTable: opts.sp.symbolTable,
-		metrics:     opts.sp.metrics,
+		appendable:   opts.sp.appendable,
+		appendableV2: opts.sp.appendableV2,
+		buffers:      opts.sp.buffers,
+		offsetSeed:   opts.sp.offsetSeed,
+		symbolTable:  opts.sp.symbolTable,
+		metrics:      opts.sp.metrics,
 
 		// config.ScrapeConfig.
 		sampleLimit: int(opts.sp.config.SampleLimit),
@@ -1303,7 +1308,9 @@ mainLoop:
 }
 
 func (sl *scrapeLoop) appender() scrapeLoopAppendAdapter {
-	// NOTE(bwplotka): Add AppenderV2 implementation, see https://github.com/prometheus/prometheus/issues/17632.
+	if sl.appendableV2 != nil {
+		return &scrapeLoopAppenderV2{scrapeLoop: sl, AppenderV2: sl.appendableV2.AppenderV2(sl.appenderCtx)}
+	}
 	return &scrapeLoopAppender{scrapeLoop: sl, Appender: sl.appendable.Appender(sl.appenderCtx)}
 }
 
@@ -1637,7 +1644,7 @@ loop:
 			break
 		}
 		switch et {
-		// TODO(bwplotka): Consider changing parser to give metadata at once instead of type, help and unit in separation, ideally on `Series()/Histogram()
+		// TODO(bwplotka): Consider changing parser to give metadata at once instead of type, help and unit in separation, ideally on `Series()/Histogram()`
 		// otherwise we can expose metadata without series on metadata API.
 		case textparse.EntryType:
 			// TODO(bwplotka): Build meta entry directly instead of locking and updating the map. This will
@@ -1753,7 +1760,7 @@ loop:
 			}
 		}
 
-		sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs)
+		sampleAdded, err = sl.checkAddError(met, nil, err, &sampleLimitErr, &bucketLimitErr, &appErrs)
 		if err != nil {
 			if !errors.Is(err, storage.ErrNotFound) {
 				sl.l.Debug("Unexpected error", "series", string(met), "err", err)
@@ -1829,7 +1836,7 @@ loop:
 			if !seriesCached || lastMeta.lastIterChange == sl.cache.iter {
 				// In majority cases we can trust that the current series/histogram is matching the lastMeta and lastMFName.
 				// However, optional TYPE etc metadata and broken OM text can break this, detect those cases here.
-				// TODO(bwplotka): Consider moving this to parser as many parser users end up doing this (e.g. ST and NHCB parsing).
+				// TODO(https://github.com/prometheus/prometheus/issues/17900): Move this to text and OM parser.
 				if isSeriesPartOfFamily(lset.Get(model.MetricNameLabel), lastMFName, lastMeta.Type) {
 					if _, merr := app.UpdateMetadata(ref, lset, lastMeta.Metadata); merr != nil {
 						// No need to fail the scrape on errors appending metadata.
@@ -1871,6 +1878,7 @@ loop:
 	return total, added, seriesAdded, err
 }
 
+// TODO(https://github.com/prometheus/prometheus/issues/17900): Move this to text and OM parser.
 func isSeriesPartOfFamily(mName string, mfName []byte, typ model.MetricType) bool {
 	mfNameStr := yoloString(mfName)
 	if !strings.HasPrefix(mName, mfNameStr) { // Fast path.
@@ -1942,7 +1950,7 @@ func isSeriesPartOfFamily(mName string, mfName []byte, typ model.MetricType) boo
 // during normal operation (e.g., accidental cardinality explosion, sudden traffic spikes).
 // Current case ordering prevents exercising other cases when limits are exceeded.
 // Remaining error cases typically occur only a few times, often during initial setup.
-func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (sampleAdded bool, _ error) {
+func (sl *scrapeLoop) checkAddError(met []byte, exemplars []exemplar.Exemplar, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (sampleAdded bool, _ error) {
 	switch {
 	case err == nil:
 		return true, nil
@@ -1974,6 +1982,26 @@ func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucke
 	case errors.Is(err, storage.ErrNotFound):
 		return false, storage.ErrNotFound
 	default:
+		// If nothing from the above, check for partial errors. Do this here to not alloc the pErr on a hot path.
+		var pErr *storage.AppendPartialError
+		if errors.As(err, &pErr) {
+			outOfOrderExemplars := 0
+			for _, e := range pErr.ExemplarErrors {
+				if errors.Is(e, storage.ErrOutOfOrderExemplar) {
+					outOfOrderExemplars++
+				}
+				// Since exemplar storage is still experimental, we don't fail or check other errors.
+				// Debug log is emitted in TSDB already.
+			}
+			if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) {
+				// Only report out of order exemplars if all are out of order, otherwise this was a partial update
+				// to some existing set of exemplars.
+				appErrs.numExemplarOutOfOrder += outOfOrderExemplars
+				sl.l.Debug("Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1]))
+				sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars))
+			}
+			return true, nil
+		}
 		return false, err
 	}
 }
diff --git a/scrape/scrape_append_v2.go b/scrape/scrape_append_v2.go
new file mode 100644
index 0000000000..64969707e1
--- /dev/null
+++ b/scrape/scrape_append_v2.go
@@ -0,0 +1,416 @@
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scrape
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"slices"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/model/exemplar"
+	"github.com/prometheus/prometheus/model/histogram"
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/model/textparse"
+	"github.com/prometheus/prometheus/model/timestamp"
+	"github.com/prometheus/prometheus/model/value"
+	"github.com/prometheus/prometheus/storage"
+)
+
+// appenderWithLimits returns an appender with additional validation.
+func appenderV2WithLimits(app storage.AppenderV2, sampleLimit, bucketLimit int, maxSchema int32) storage.AppenderV2 {
+	app = &timeLimitAppenderV2{
+		AppenderV2: app,
+		maxTime:    timestamp.FromTime(time.Now().Add(maxAheadTime)),
+	}
+
+	// The sampleLimit is applied after metrics are potentially dropped via relabeling.
+	if sampleLimit > 0 {
+		app = &limitAppenderV2{
+			AppenderV2: app,
+			limit:      sampleLimit,
+		}
+	}
+
+	if bucketLimit > 0 {
+		app = &bucketLimitAppenderV2{
+			AppenderV2: app,
+			limit:      bucketLimit,
+		}
+	}
+
+	if maxSchema < histogram.ExponentialSchemaMax {
+		app = &maxSchemaAppenderV2{
+			AppenderV2: app,
+			maxSchema:  maxSchema,
+		}
+	}
+
+	return app
+}
+
+func (sl *scrapeLoop) updateStaleMarkersV2(app storage.AppenderV2, defTime int64) (err error) {
+	sl.cache.forEachStale(func(ref storage.SeriesRef, lset labels.Labels) bool {
+		// Series no longer exposed, mark it stale.
+		_, err = app.Append(ref, lset, 0, defTime, math.Float64frombits(value.StaleNaN), nil, nil, storage.AOptions{RejectOutOfOrder: true})
+		switch {
+		case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp):
+			// Do not count these in logging, as this is expected if a target
+			// goes away and comes back again with a new scrape loop.
+			err = nil
+		}
+		return err == nil
+	})
+	return err
+}
+
+type scrapeLoopAppenderV2 struct {
+	*scrapeLoop
+
+	storage.AppenderV2
+}
+
+var _ scrapeLoopAppendAdapter = &scrapeLoopAppenderV2{}
+
+func (sl *scrapeLoopAppenderV2) append(b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) {
+	defTime := timestamp.FromTime(ts)
+
+	if len(b) == 0 {
+		// Empty scrape. Just update the stale makers and swap the cache (but don't flush it).
+		err = sl.updateStaleMarkersV2(sl.AppenderV2, defTime)
+		sl.cache.iterDone(false)
+		return total, added, seriesAdded, err
+	}
+
+	p, err := textparse.New(b, contentType, sl.symbolTable, textparse.ParserOptions{
+		EnableTypeAndUnitLabels:                 sl.enableTypeAndUnitLabels,
+		IgnoreNativeHistograms:                  !sl.enableNativeHistogramScraping,
+		ConvertClassicHistogramsToNHCB:          sl.convertClassicHistToNHCB,
+		KeepClassicOnClassicAndNativeHistograms: sl.alwaysScrapeClassicHist,
+		OpenMetricsSkipSTSeries:                 sl.enableSTZeroIngestion,
+		FallbackContentType:                     sl.fallbackScrapeProtocol,
+	})
+	if p == nil {
+		sl.l.Error(
+			"Failed to determine correct type of scrape target.",
+			"content_type", contentType,
+			"fallback_media_type", sl.fallbackScrapeProtocol,
+			"err", err,
+		)
+		return total, added, seriesAdded, err
+	}
+	if err != nil {
+		sl.l.Debug(
+			"Invalid content type on scrape, using fallback setting.",
+			"content_type", contentType,
+			"fallback_media_type", sl.fallbackScrapeProtocol,
+			"err", err,
+		)
+	}
+	var (
+		appErrs        = appendErrors{}
+		sampleLimitErr error
+		bucketLimitErr error
+		lset           labels.Labels     // Escapes to heap so hoisted out of loop.
+		e              exemplar.Exemplar // Escapes to heap so hoisted out of loop.
+		lastMeta       *metaEntry
+		lastMFName     []byte
+	)
+
+	exemplars := make([]exemplar.Exemplar, 0, 1)
+
+	// Take an appender with limits.
+	app := appenderV2WithLimits(sl.AppenderV2, sl.sampleLimit, sl.bucketLimit, sl.maxSchema)
+
+	defer func() {
+		if err != nil {
+			return
+		}
+		// Flush and swap the cache as the scrape was non-empty.
+		sl.cache.iterDone(true)
+	}()
+
+loop:
+	for {
+		var (
+			et                       textparse.Entry
+			sampleAdded, isHistogram bool
+			met                      []byte
+			parsedTimestamp          *int64
+			val                      float64
+			h                        *histogram.Histogram
+			fh                       *histogram.FloatHistogram
+		)
+		if et, err = p.Next(); err != nil {
+			if errors.Is(err, io.EOF) {
+				err = nil
+			}
+			break
+		}
+		switch et {
+		// TODO(bwplotka): Consider changing parser to give metadata at once instead of type, help and unit in separation, ideally on `Series()/Histogram()
+		// otherwise we can expose metadata without series on metadata API.
+		case textparse.EntryType:
+			// TODO(bwplotka): Build meta entry directly instead of locking and updating the map. This will
+			// allow to properly update metadata when e.g unit was added, then removed;
+			lastMFName, lastMeta = sl.cache.setType(p.Type())
+			continue
+		case textparse.EntryHelp:
+			lastMFName, lastMeta = sl.cache.setHelp(p.Help())
+			continue
+		case textparse.EntryUnit:
+			lastMFName, lastMeta = sl.cache.setUnit(p.Unit())
+			continue
+		case textparse.EntryComment:
+			continue
+		case textparse.EntryHistogram:
+			isHistogram = true
+		default:
+		}
+		total++
+
+		t := defTime
+		if isHistogram {
+			met, parsedTimestamp, h, fh = p.Histogram()
+		} else {
+			met, parsedTimestamp, val = p.Series()
+		}
+		if !sl.honorTimestamps {
+			parsedTimestamp = nil
+		}
+		if parsedTimestamp != nil {
+			t = *parsedTimestamp
+		}
+
+		if sl.cache.getDropped(met) {
+			continue
+		}
+		ce, seriesCached, seriesAlreadyScraped := sl.cache.get(met)
+		var (
+			ref  storage.SeriesRef
+			hash uint64
+		)
+
+		if seriesCached {
+			ref = ce.ref
+			lset = ce.lset
+			hash = ce.hash
+		} else {
+			p.Labels(&lset)
+			hash = lset.Hash()
+
+			// Hash label set as it is seen local to the target. Then add target labels
+			// and relabeling and store the final label set.
+			lset = sl.sampleMutator(lset)
+
+			// The label set may be set to empty to indicate dropping.
+			if lset.IsEmpty() {
+				sl.cache.addDropped(met)
+				continue
+			}
+
+			if !lset.Has(model.MetricNameLabel) {
+				err = errNameLabelMandatory
+				break loop
+			}
+			if !lset.IsValid(sl.validationScheme) {
+				err = fmt.Errorf("invalid metric name or label names: %s", lset.String())
+				break loop
+			}
+
+			// If any label limits is exceeded the scrape should fail.
+			if err = verifyLabelLimits(lset, sl.labelLimits); err != nil {
+				sl.metrics.targetScrapePoolExceededLabelLimits.Inc()
+				break loop
+			}
+		}
+
+		exemplars = exemplars[:0] // Reset and reuse the exemplar slice.
+
+		if seriesAlreadyScraped && parsedTimestamp == nil {
+			err = storage.ErrDuplicateSampleForTimestamp
+		} else {
+			// Double check we don't append float 0 for
+			// histogram case where parser returns bad data.
+			// This can only happen when parser has a bug.
+			if isHistogram && h == nil && fh == nil {
+				err = fmt.Errorf("parser returned nil histogram/float histogram for a histogram entry type for %v series; parser bug; aborting", lset.String())
+				break loop
+			}
+
+			st := int64(0)
+			if sl.enableSTZeroIngestion {
+				// p.StartTimestamp() tend to be expensive (e.g. OM1). Do it only if we care.
+				st = p.StartTimestamp()
+			}
+
+			for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) {
+				if !e.HasTs {
+					if isHistogram {
+						// We drop exemplars for native histograms if they don't have a timestamp.
+						// Missing timestamps are deliberately not supported as we want to start
+						// enforcing timestamps for exemplars as otherwise proper deduplication
+						// is inefficient and purely based on heuristics: we cannot distinguish
+						// between repeated exemplars and new instances with the same values.
+						// This is done silently without logs as it is not an error but out of spec.
+						// This does not affect classic histograms so that behaviour is unchanged.
+						e = exemplar.Exemplar{} // Reset for the next fetch.
+						continue
+					}
+					e.Ts = t
+				}
+				exemplars = append(exemplars, e)
+				e = exemplar.Exemplar{} // Reset for the next fetch.
+			}
+
+			// Prepare append call.
+			appOpts := storage.AOptions{}
+			if len(exemplars) > 0 {
+				// Sort so that checking for duplicates / out of order is more efficient during validation.
+				slices.SortFunc(exemplars, exemplar.Compare)
+				appOpts.Exemplars = exemplars
+			}
+
+			// Metadata path mimicks the scrape appender V1 flow. Once we remove v2
+			// flow we should rename "appendMetadataToWAL" flag to "passMetadata" because for v2 flow
+			// the metadata storage detail is behind the appendableV2 contract. V2 also means we always pass the metadata,
+			// we don't check if it changed (that code can be removed).
+			//
+			// Long term, we should always attach the metadata without any flag. Unfortunately because of the limitation
+			// of the TEXT and OpenMetrics 1.0 (hopefully fixed in OpenMetrics 2.0) there are edge cases around unknown
+			// metadata + suffixes that is expensive (isSeriesPartOfFamily) or in some cases impossible to detect. For this
+			// reason metadata (appendMetadataToWAL=true) appender V2 flow scrape might taking ~3% more CPU in our benchmarks.
+			//
+			// TODO(https://github.com/prometheus/prometheus/issues/17900): Optimize this, notably move this check to parsers that require this (ensuring parser
+			// interface always yields correct metadata), deliver OpenMetrics 2.0 that removes suffixes.
+			if sl.appendMetadataToWAL && lastMeta != nil {
+				// In majority cases we can trust that the current series/histogram is matching the lastMeta and lastMFName.
+				// However, optional TYPE, etc metadata and broken OM text can break this, detect those cases here.
+				if !isSeriesPartOfFamily(lset.Get(model.MetricNameLabel), lastMFName, lastMeta.Type) {
+					lastMeta = nil // Don't pass knowingly broken metadata, now, nor on the next line.
+				}
+				if lastMeta != nil {
+					// Metric family name has the same source as metadata.
+					appOpts.MetricFamilyName = yoloString(lastMFName)
+					appOpts.Metadata = lastMeta.Metadata
+				}
+			}
+
+			// Append sample to the storage.
+			ref, err = app.Append(ref, lset, st, t, val, h, fh, appOpts)
+		}
+		sampleAdded, err = sl.checkAddError(met, exemplars, err, &sampleLimitErr, &bucketLimitErr, &appErrs)
+		if err != nil {
+			if !errors.Is(err, storage.ErrNotFound) {
+				sl.l.Debug("Unexpected error", "series", string(met), "err", err)
+			}
+			break loop
+		}
+		if (parsedTimestamp == nil || sl.trackTimestampsStaleness) && ce != nil {
+			sl.cache.trackStaleness(ce.ref, ce)
+		}
+
+		// If series wasn't cached (is new, not seen on previous scrape) we need to add it to the scrape cache.
+		// But we only do this for series that were appended to TSDB without errors.
+		// If a series was new, but we didn't append it due to sample_limit or other errors then we don't need
+		// it in the scrape cache because we don't need to emit StaleNaNs for it when it disappears.
+		if !seriesCached && sampleAdded {
+			ce = sl.cache.addRef(met, ref, lset, hash)
+			if ce != nil && (parsedTimestamp == nil || sl.trackTimestampsStaleness) {
+				// Bypass staleness logic if there is an explicit timestamp.
+				// But make sure we only do this if we have a cache entry (ce) for our series.
+				sl.cache.trackStaleness(ref, ce)
+			}
+			if sampleLimitErr == nil && bucketLimitErr == nil {
+				seriesAdded++
+			}
+		}
+
+		// Increment added even if there's an error so we correctly report the
+		// number of samples remaining after relabeling.
+		// We still report duplicated samples here since this number should be the exact number
+		// of time series exposed on a scrape after relabelling.
+		added++
+	}
+	if sampleLimitErr != nil {
+		if err == nil {
+			err = sampleLimitErr
+		}
+		// We only want to increment this once per scrape, so this is Inc'd outside the loop.
+		sl.metrics.targetScrapeSampleLimit.Inc()
+	}
+	if bucketLimitErr != nil {
+		if err == nil {
+			err = bucketLimitErr // If sample limit is hit, that error takes precedence.
+		}
+		// We only want to increment this once per scrape, so this is Inc'd outside the loop.
+		sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc()
+	}
+	if appErrs.numOutOfOrder > 0 {
+		sl.l.Warn("Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder)
+	}
+	if appErrs.numDuplicates > 0 {
+		sl.l.Warn("Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates)
+	}
+	if appErrs.numOutOfBounds > 0 {
+		sl.l.Warn("Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds)
+	}
+	if appErrs.numExemplarOutOfOrder > 0 {
+		sl.l.Warn("Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder)
+	}
+	if err == nil {
+		err = sl.updateStaleMarkersV2(app, defTime)
+	}
+	return total, added, seriesAdded, err
+}
+
+func (sl *scrapeLoopAppenderV2) addReportSample(s reportSample, t int64, v float64, b *labels.Builder, rejectOOO bool) (err error) {
+	ce, ok, _ := sl.cache.get(s.name)
+	var ref storage.SeriesRef
+	var lset labels.Labels
+	if ok {
+		ref = ce.ref
+		lset = ce.lset
+	} else {
+		// The constants are suffixed with the invalid \xff unicode rune to avoid collisions
+		// with scraped metrics in the cache.
+		// We have to drop it when building the actual metric.
+		b.Reset(labels.EmptyLabels())
+		b.Set(model.MetricNameLabel, string(s.name[:len(s.name)-1]))
+		lset = sl.reportSampleMutator(b.Labels())
+	}
+
+	ref, err = sl.Append(ref, lset, 0, t, v, nil, nil, storage.AOptions{
+		MetricFamilyName: yoloString(s.name),
+		Metadata:         s.Metadata,
+		RejectOutOfOrder: rejectOOO,
+	})
+	switch {
+	case err == nil:
+		if !ok {
+			sl.cache.addRef(s.name, ref, lset, lset.Hash())
+		}
+		return nil
+	case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp):
+		// Do not log here, as this is expected if a target goes away and comes back
+		// again with a new scrape loop.
+		return nil
+	default:
+		return err
+	}
+}
diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go
index 07d9f66266..9c12a31ab3 100644
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@@ -36,8 +36,6 @@ import (
 	"time"
 
 	"github.com/gogo/protobuf/proto"
-	"github.com/google/go-cmp/cmp"
-	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/grafana/regexp"
 	"github.com/prometheus/client_golang/prometheus"
 	prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
@@ -65,6 +63,7 @@ import (
 	"github.com/prometheus/prometheus/model/timestamp"
 	"github.com/prometheus/prometheus/model/value"
 	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
 	"github.com/prometheus/prometheus/util/pool"
 	"github.com/prometheus/prometheus/util/teststorage"
@@ -88,42 +87,67 @@ func newTestScrapeMetrics(t testing.TB) *scrapeMetrics {
 }
 
 func TestNewScrapePool(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testNewScrapePool(t, appV2)
+	})
+}
+
+func testNewScrapePool(t *testing.T, appV2 bool) {
 	var (
 		app = teststorage.NewAppendable()
+		sa  = selectAppendable(app, appV2)
 		cfg = &config.ScrapeConfig{
 			MetricNameValidationScheme: model.UTF8Validation,
 			MetricNameEscapingScheme:   model.AllowUTF8,
 		}
-		sp, err = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+		sp, err = newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	)
 	require.NoError(t, err)
 
+	if appV2 {
+		a, ok := sp.appendableV2.(*teststorage.Appendable)
+		require.True(t, ok, "Failure to append.")
+		require.Equal(t, app, a, "Wrong sample AppenderV2.")
+		require.Equal(t, cfg, sp.config, "Wrong scrape config.")
+
+		require.Nil(t, sp.appendable)
+		return
+	}
 	a, ok := sp.appendable.(*teststorage.Appendable)
 	require.True(t, ok, "Failure to append.")
 	require.Equal(t, app, a, "Wrong sample appender.")
 	require.Equal(t, cfg, sp.config, "Wrong scrape config.")
+
+	require.Nil(t, sp.appendableV2)
 }
 
 func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testStorageHandlesOutOfOrderTimestamps(t, appV2)
+	})
+}
+
+func testStorageHandlesOutOfOrderTimestamps(t *testing.T, appV2 bool) {
 	// Test with default OutOfOrderTimeWindow (0)
 	t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) {
 		s := teststorage.New(t)
 		t.Cleanup(func() { _ = s.Close() })
-
-		runScrapeLoopTest(t, s, false)
+		runScrapeLoopTest(t, appV2, s, false)
 	})
 
 	// Test with specific OutOfOrderTimeWindow (600000)
 	t.Run("Out-Of-Order Sample Enabled", func(t *testing.T) {
-		s := teststorage.New(t, 600000)
+		s := teststorage.New(t, func(opt *tsdb.Options) {
+			opt.OutOfOrderTimeWindow = 600000
+		})
 		t.Cleanup(func() { _ = s.Close() })
 
-		runScrapeLoopTest(t, s, true)
+		runScrapeLoopTest(t, appV2, s, true)
 	})
 }
 
-func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) {
-	sl, _ := newTestScrapeLoop(t, withAppendable(s))
+func runScrapeLoopTest(t *testing.T, appV2 bool, s *teststorage.TestStorage, expectOutOfOrder bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(s, appV2))
 
 	// Current time for generating timestamps.
 	now := time.Now()
@@ -184,14 +208,20 @@ func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrde
 	}
 
 	if expectOutOfOrder {
-		require.NotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results)
+		teststorage.RequireNotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results)
 	} else {
-		require.Equal(t, want, results, "Appended samples not as expected:\n%s", results)
+		teststorage.RequireEqual(t, want, results, "Appended samples not as expected:\n%s", results)
 	}
 }
 
 // Regression test against https://github.com/prometheus/prometheus/issues/15831.
 func TestScrapeAppend_MetadataUpdate(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeAppendMetadataUpdate(t, appV2)
+	})
+}
+
+func testScrapeAppendMetadataUpdate(t *testing.T, appV2 bool) {
 	const (
 		scrape1 = `# TYPE test_metric counter
 # HELP test_metric some help text
@@ -215,32 +245,40 @@ test_metric2{foo="bar"} 22
 	)
 
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	now := time.Now()
 	app := sl.appender()
 	_, _, _, err := app.append([]byte(scrape1), "application/openmetrics-text", now)
 	require.NoError(t, err)
 	require.NoError(t, app.Commit())
-	testutil.RequireEqual(t, []sample{
+	teststorage.RequireEqual(t, []sample{
 		{L: labels.FromStrings("__name__", "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}},
 		{L: labels.FromStrings("__name__", "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}},
 	}, appTest.ResultMetadata())
 	appTest.ResultReset()
 
-	// Next (the same) scrape should not new metadata entries.
 	app = sl.appender()
 	_, _, _, err = app.append([]byte(scrape1), "application/openmetrics-text", now.Add(15*time.Second))
 	require.NoError(t, err)
 	require.NoError(t, app.Commit())
-	require.Empty(t, appTest.ResultMetadata())
+	if appV2 {
+		// Next (the same) scrape should pass new metadata entries as per always-on metadata Appendable V2 contract.
+		teststorage.RequireEqual(t, []sample{
+			{L: labels.FromStrings("__name__", "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}},
+			{L: labels.FromStrings("__name__", "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}},
+		}, appTest.ResultMetadata())
+	} else {
+		// Next (the same) scrape should not add new metadata entries.
+		require.Empty(t, appTest.ResultMetadata())
+	}
 	appTest.ResultReset()
 
 	app = sl.appender()
 	_, _, _, err = app.append([]byte(scrape2), "application/openmetrics-text", now.Add(15*time.Second))
 	require.NoError(t, err)
 	require.NoError(t, app.Commit())
-	testutil.RequireEqual(t, []sample{
+	teststorage.RequireEqual(t, []sample{
 		{L: labels.FromStrings("__name__", "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "different help text"}}, // Here, technically we should have no unit, but it's a known limitation of the current implementation.
 		{L: labels.FromStrings("__name__", "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "metric2", Help: "other help text"}},
 	}, appTest.ResultMetadata())
@@ -248,14 +286,20 @@ test_metric2{foo="bar"} 22
 }
 
 func TestScrapeReportMetadata(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeReportMetadata(t, appV2)
+	})
+}
+
+func testScrapeReportMetadata(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 	app := sl.appender()
 
 	now := time.Now()
 	require.NoError(t, sl.report(app, now, 2*time.Second, 1, 1, 1, 512, nil))
 	require.NoError(t, app.Commit())
-	testutil.RequireEqual(t, []sample{
+	teststorage.RequireEqual(t, []sample{
 		{L: labels.FromStrings("__name__", "up"), M: scrapeHealthMetric.Metadata},
 		{L: labels.FromStrings("__name__", "scrape_duration_seconds"), M: scrapeDurationMetric.Metadata},
 		{L: labels.FromStrings("__name__", "scrape_samples_scraped"), M: scrapeSamplesMetric.Metadata},
@@ -313,6 +357,12 @@ func TestIsSeriesPartOfFamily(t *testing.T) {
 }
 
 func TestDroppedTargetsList(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testDroppedTargetsList(t, appV2)
+	})
+}
+
+func testDroppedTargetsList(t *testing.T, appV2 bool) {
 	var (
 		app = teststorage.NewAppendable()
 		cfg = &config.ScrapeConfig{
@@ -337,7 +387,8 @@ func TestDroppedTargetsList(t *testing.T) {
 				},
 			},
 		}
-		sp, _                  = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+		sa                     = selectAppendable(app, appV2)
+		sp, _                  = newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 		expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __scrape_interval__=\"0s\", __scrape_timeout__=\"0s\", job=\"dropMe\"}"
 		expectedLength         = 2
 	)
@@ -358,7 +409,7 @@ func TestDroppedTargetsList(t *testing.T) {
 // TestDiscoveredLabelsUpdate checks that DiscoveredLabels are updated
 // even when new labels don't affect the target `hash`.
 func TestDiscoveredLabelsUpdate(t *testing.T) {
-	sp := newTestScrapePool(t, nil)
+	sp := newTestScrapePool(t, nil, false, nil)
 
 	// These are used when syncing so need this to avoid a panic.
 	sp.config = &config.ScrapeConfig{
@@ -430,7 +481,7 @@ func (*testLoop) getCache() *scrapeCache {
 
 func TestScrapePoolStop(t *testing.T) {
 	t.Parallel()
-	sp := newTestScrapePool(t, nil)
+	sp := newTestScrapePool(t, nil, false, nil)
 
 	var mtx sync.Mutex
 	stopped := map[uint64]bool{}
@@ -530,7 +581,7 @@ func TestScrapePoolReload(t *testing.T) {
 
 	// Create test pool.
 	reg, metrics := newTestRegistryAndScrapeMetrics(t)
-	sp := newTestScrapePool(t, newLoopCfg1)
+	sp := newTestScrapePool(t, nil, false, newLoopCfg1)
 	sp.metrics = metrics
 
 	// Prefill pool with 20 loops, simulating 20 scrape targets.
@@ -592,7 +643,7 @@ func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) {
 		return l
 	}
 	reg, metrics := newTestRegistryAndScrapeMetrics(t)
-	sp := newTestScrapePool(t, newLoop)
+	sp := newTestScrapePool(t, nil, false, newLoop)
 	sp.activeTargets[1] = &Target{
 		labels: labels.FromStrings(model.ScrapeIntervalLabel, "5s", model.ScrapeTimeoutLabel, "3s"),
 	}
@@ -644,7 +695,7 @@ func TestScrapePoolTargetLimit(t *testing.T) {
 		return l
 	}
 
-	sp := newTestScrapePool(t, newLoop)
+	sp := newTestScrapePool(t, nil, false, newLoop)
 
 	var tgs []*targetgroup.Group
 	for i := range 50 {
@@ -756,7 +807,9 @@ func TestScrapePoolAppenderWithLimits(t *testing.T) {
 	baseAppender := struct{ storage.Appender }{}
 	appendable := appendableFunc(func(context.Context) storage.Appender { return baseAppender })
 
-	sl, _ := newTestScrapeLoop(t, withAppendable(appendable))
+	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+		sl.appendable = appendable
+	})
 	wrapped := appenderWithLimits(sl.appendable.Appender(context.Background()), 0, 0, histogram.ExponentialSchemaMax)
 
 	tl, ok := wrapped.(*timeLimitAppender)
@@ -809,7 +862,77 @@ func TestScrapePoolAppenderWithLimits(t *testing.T) {
 	require.Equal(t, baseAppender, tl.Appender, "Expected base appender but got %T", tl.Appender)
 }
 
+type appendableV2Func func(ctx context.Context) storage.AppenderV2
+
+func (a appendableV2Func) AppenderV2(ctx context.Context) storage.AppenderV2 { return a(ctx) }
+
+func TestScrapePoolAppenderWithLimits_AppendV2(t *testing.T) {
+	// Create a unique value, to validate the correct chain of appenders.
+	baseAppender := struct{ storage.AppenderV2 }{}
+	appendable := appendableV2Func(func(context.Context) storage.AppenderV2 { return baseAppender })
+
+	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+		sl.appendableV2 = appendable
+	})
+	wrapped := appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), 0, 0, histogram.ExponentialSchemaMax)
+
+	tl, ok := wrapped.(*timeLimitAppenderV2)
+	require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", wrapped)
+
+	require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2)
+
+	sampleLimit := 100
+	sl, _ = newTestScrapeLoop(t, func(sl *scrapeLoop) {
+		sl.appendableV2 = appendable
+		sl.sampleLimit = sampleLimit
+	})
+	wrapped = appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), sampleLimit, 0, histogram.ExponentialSchemaMax)
+
+	la, ok := wrapped.(*limitAppenderV2)
+	require.True(t, ok, "Expected limitAppenderV2 but got %T", wrapped)
+
+	tl, ok = la.AppenderV2.(*timeLimitAppenderV2)
+	require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", la.AppenderV2)
+
+	require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2)
+
+	wrapped = appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), sampleLimit, 100, histogram.ExponentialSchemaMax)
+
+	bl, ok := wrapped.(*bucketLimitAppenderV2)
+	require.True(t, ok, "Expected bucketLimitAppenderV2 but got %T", wrapped)
+
+	la, ok = bl.AppenderV2.(*limitAppenderV2)
+	require.True(t, ok, "Expected limitAppenderV2 but got %T", bl)
+
+	tl, ok = la.AppenderV2.(*timeLimitAppenderV2)
+	require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", la.AppenderV2)
+
+	require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2)
+
+	wrapped = appenderV2WithLimits(sl.appendableV2.AppenderV2(context.Background()), sampleLimit, 100, 0)
+
+	ml, ok := wrapped.(*maxSchemaAppenderV2)
+	require.True(t, ok, "Expected maxSchemaAppenderV2 but got %T", wrapped)
+
+	bl, ok = ml.AppenderV2.(*bucketLimitAppenderV2)
+	require.True(t, ok, "Expected bucketLimitAppenderV2 but got %T", wrapped)
+
+	la, ok = bl.AppenderV2.(*limitAppenderV2)
+	require.True(t, ok, "Expected limitAppenderV2 but got %T", bl)
+
+	tl, ok = la.AppenderV2.(*timeLimitAppenderV2)
+	require.True(t, ok, "Expected timeLimitAppenderV2 but got %T", la.AppenderV2)
+
+	require.Equal(t, baseAppender, tl.AppenderV2, "Expected base AppenderV2 but got %T", tl.AppenderV2)
+}
+
 func TestScrapePoolRaces(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapePoolRaces(t, appV2)
+	})
+}
+
+func testScrapePoolRaces(t *testing.T, appV2 bool) {
 	t.Parallel()
 	interval, _ := model.ParseDuration("1s")
 	timeout, _ := model.ParseDuration("500ms")
@@ -821,7 +944,8 @@ func TestScrapePoolRaces(t *testing.T) {
 			MetricNameEscapingScheme:   model.AllowUTF8,
 		}
 	}
-	sp, _ := newScrapePool(newConfig(), teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+	sa := selectAppendable(teststorage.NewAppendable(), appV2)
+	sp, _ := newScrapePool(newConfig(), sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	tgts := []*targetgroup.Group{
 		{
 			Targets: []model.LabelSet{
@@ -853,6 +977,12 @@ func TestScrapePoolRaces(t *testing.T) {
 }
 
 func TestScrapePoolScrapeLoopsStarted(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapePoolScrapeLoopsStarted(t, appV2)
+	})
+}
+
+func testScrapePoolScrapeLoopsStarted(t *testing.T, appV2 bool) {
 	var wg sync.WaitGroup
 	newLoop := func(scrapeLoopOptions) loop {
 		wg.Add(1)
@@ -864,7 +994,7 @@ func TestScrapePoolScrapeLoopsStarted(t *testing.T) {
 		}
 		return l
 	}
-	sp := newTestScrapePool(t, newLoop)
+	sp := newTestScrapePool(t, teststorage.NewAppendable(), appV2, newLoop)
 
 	tgs := []*targetgroup.Group{
 		{
@@ -946,11 +1076,16 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) {
 func nopMutator(l labels.Labels) labels.Labels { return l }
 
 func TestScrapeLoopStop(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopStop(t, appV2)
+	})
+}
+
+func testScrapeLoopStop(t *testing.T, appV2 bool) {
 	signal := make(chan struct{}, 1)
 
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		// Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 	})
@@ -1000,6 +1135,12 @@ func TestScrapeLoopStop(t *testing.T) {
 }
 
 func TestScrapeLoopRun(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRun(t, appV2)
+	})
+}
+
+func testScrapeLoopRun(t *testing.T, appV2 bool) {
 	t.Parallel()
 	var (
 		signal = make(chan struct{}, 1)
@@ -1007,7 +1148,7 @@ func TestScrapeLoopRun(t *testing.T) {
 	)
 
 	ctx, cancel := context.WithCancel(t.Context())
-	sl, scraper := newTestScrapeLoop(t, withCtx(ctx))
+	sl, scraper := newTestScrapeLoop(t, withCtx(ctx), withAppendable(teststorage.NewAppendable(), appV2))
 	// The loop must terminate during the initial offset if the context
 	// is canceled.
 	scraper.offsetDur = time.Hour
@@ -1030,7 +1171,7 @@ func TestScrapeLoopRun(t *testing.T) {
 	}
 
 	ctx, cancel = context.WithCancel(t.Context())
-	sl, scraper = newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper = newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
 		sl.timeout = 100 * time.Millisecond
 	})
@@ -1076,13 +1217,19 @@ func TestScrapeLoopRun(t *testing.T) {
 }
 
 func TestScrapeLoopForcedErr(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopForcedErr(t, appV2)
+	})
+}
+
+func testScrapeLoopForcedErr(t *testing.T, appV2 bool) {
 	var (
 		signal = make(chan struct{}, 1)
 		errc   = make(chan error)
 	)
 
 	ctx, cancel := context.WithCancel(t.Context())
-	sl, scraper := newTestScrapeLoop(t, withCtx(ctx))
+	sl, scraper := newTestScrapeLoop(t, withCtx(ctx), withAppendable(teststorage.NewAppendable(), appV2))
 
 	forcedErr := errors.New("forced err")
 	sl.setForcedError(forcedErr)
@@ -1113,6 +1260,12 @@ func TestScrapeLoopForcedErr(t *testing.T) {
 }
 
 func TestScrapeLoopRun_ContextCancelTerminatesBlockedSend(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunContextCancelTerminatesBlockedSend(t, appV2)
+	})
+}
+
+func testScrapeLoopRunContextCancelTerminatesBlockedSend(t *testing.T, appV2 bool) {
 	// Regression test for issue #17553
 	defer goleak.VerifyNone(t)
 
@@ -1122,7 +1275,7 @@ func TestScrapeLoopRun_ContextCancelTerminatesBlockedSend(t *testing.T) {
 	)
 
 	ctx, cancel := context.WithCancel(t.Context())
-	sl, scraper := newTestScrapeLoop(t, withCtx(ctx))
+	sl, scraper := newTestScrapeLoop(t, withCtx(ctx), withAppendable(teststorage.NewAppendable(), appV2))
 
 	forcedErr := errors.New("forced err")
 	sl.setForcedError(forcedErr)
@@ -1149,7 +1302,13 @@ func TestScrapeLoopRun_ContextCancelTerminatesBlockedSend(t *testing.T) {
 }
 
 func TestScrapeLoopMetadata(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t)
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopMetadata(t, appV2)
+	})
+}
+
+func testScrapeLoopMetadata(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2))
 
 	app := sl.appender()
 	total, _, _, err := app.append([]byte(`# TYPE test_metric counter
@@ -1183,7 +1342,13 @@ test_metric_total 1
 }
 
 func TestScrapeLoopSeriesAdded(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t)
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopSeriesAdded(t, appV2)
+	})
+}
+
+func testScrapeLoopSeriesAdded(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2))
 
 	app := sl.appender()
 	total, added, seriesAdded, err := app.append([]byte("test_metric 1\n"), "text/plain", time.Time{})
@@ -1203,6 +1368,12 @@ func TestScrapeLoopSeriesAdded(t *testing.T) {
 }
 
 func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopFailWithInvalidLabelsAfterRelabel(t, appV2)
+	})
+}
+
+func testScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T, appV2 bool) {
 	target := &Target{
 		labels: labels.FromStrings("pod_label_invalid_012\xff", "test"),
 	}
@@ -1213,7 +1384,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
 		Replacement:          "$1",
 		NameValidationScheme: model.UTF8Validation,
 	}}
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		sl.sampleMutator = func(l labels.Labels) labels.Labels {
 			return mutateSampleLabels(l, target, true, relabelConfig)
 		}
@@ -1229,7 +1400,13 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
 }
 
 func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopFailLegacyUnderUTF8(t, appV2)
+	})
+}
+
+func testScrapeLoopFailLegacyUnderUTF8(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		sl.validationScheme = model.LegacyValidation
 	})
 
@@ -1242,7 +1419,7 @@ func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) {
 	require.Equal(t, 0, seriesAdded)
 
 	// When scrapeloop has validation set to UTF-8, the metric is allowed.
-	sl, _ = newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, _ = newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		sl.validationScheme = model.UTF8Validation
 	})
 
@@ -1275,13 +1452,50 @@ func makeTestGauges(n int) []byte {
 	return sb.Bytes()
 }
 
+func makeTestHistogramsWithExemplars(n int) []byte {
+	sb := bytes.Buffer{}
+	for i := range n {
+		sb.WriteString(strings.ReplaceAll(`# HELP rpc_durations_histogram%d_seconds RPC latency distributions.
+# TYPE rpc_durations_histogram%d_seconds histogram
+rpc_durations_histogram%d_seconds_bucket{le="-0.00099"} 0
+rpc_durations_histogram%d_seconds_bucket{le="-0.00089"} 1 # {dummyID="1242"} -0.00091 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0007899999999999999"} 1 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0006899999999999999"} 2 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0005899999999999998"} 3 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0004899999999999998"} 4 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0003899999999999998"} 5 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0002899999999999998"} 6 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09
+rpc_durations_histogram%d_seconds_bucket{le="-0.0001899999999999998"} 7 # {dummyID="84741"} -0.00020178290006788965 1.726839814829977e+09
+rpc_durations_histogram%d_seconds_bucket{le="-8.999999999999979e-05"} 7
+rpc_durations_histogram%d_seconds_bucket{le="1.0000000000000216e-05"} 8 # {dummyID="19206"} -4.6156147425468016e-05 1.7268398151337721e+09
+rpc_durations_histogram%d_seconds_bucket{le="0.00011000000000000022"} 9 # {dummyID="3974"} 9.528436760156754e-05 1.726839814526797e+09
+rpc_durations_histogram%d_seconds_bucket{le="0.00021000000000000023"} 11 # {dummyID="29640"} 0.00017459624183458996 1.7268398139220061e+09
+rpc_durations_histogram%d_seconds_bucket{le="0.0003100000000000002"} 15 # {dummyID="9818"} 0.0002791130914009552 1.7268398149821382e+09
+rpc_durations_histogram%d_seconds_bucket{le="0.0004100000000000002"} 15
+rpc_durations_histogram%d_seconds_bucket{le="0.0005100000000000003"} 15
+rpc_durations_histogram%d_seconds_bucket{le="0.0006100000000000003"} 15
+rpc_durations_histogram%d_seconds_bucket{le="0.0007100000000000003"} 15
+rpc_durations_histogram%d_seconds_bucket{le="0.0008100000000000004"} 15
+rpc_durations_histogram%d_seconds_bucket{le="0.0009100000000000004"} 15
+rpc_durations_histogram%d_seconds_bucket{le="+Inf"} 15
+rpc_durations_histogram%d_seconds_sum -8.452185437166741e-05
+rpc_durations_histogram%d_seconds_count 15
+rpc_durations_histogram%d_seconds_created 1.726839813016302e+09
+`, "%d", strconv.Itoa(i)))
+	}
+	sb.WriteString("# EOF\n")
+	return sb.Bytes()
+}
+
+// promTextToProto converts Prometheus text to proto.
+// Given expfmt decoding limitations, it does not support OpenMetrics fully (e.g. exemplars).
 func promTextToProto(tb testing.TB, text []byte) []byte {
 	tb.Helper()
 
 	p := expfmt.NewTextParser(model.UTF8Validation)
 	fams, err := p.TextToMetricFamilies(bytes.NewReader(text))
 	if err != nil {
-		tb.Fatal(err)
+		tb.Fatal("TextToMetricFamilies:", err)
 	}
 	// Order by name for the deterministic tests.
 	var names []string
@@ -1307,8 +1521,7 @@ func promTextToProto(tb testing.TB, text []byte) []byte {
 
 func TestPromTextToProto(t *testing.T) {
 	metricsText := readTextParseTestMetrics(t)
-	// TODO(bwplotka): Windows adds \r for new lines which is
-	// not handled correctly in the expfmt parser, fix it.
+	// On windows \r is added when reading, but parsers do not support this. Kill it.
 	metricsText = bytes.ReplaceAll(metricsText, []byte("\r"), nil)
 
 	metricsProto := promTextToProto(t, metricsText)
@@ -1332,9 +1545,11 @@ func TestPromTextToProto(t *testing.T) {
 	require.Equal(t, "promhttp_metric_handler_requests_total", got[236])
 }
 
-// BenchmarkScrapeLoopAppend benchmarks a core append function in a scrapeLoop
-// that creates a new parser and goes through a byte slice from a single scrape.
-// Benchmark compares append function run across 2 dimensions:
+// BenchmarkScrapeLoopAppend benchmarks scrape appends for typical cases.
+//
+// Benchmark compares append function run across 4 dimensions:
+// * `appV2`: appender V1 or V2
+// * `appendMetadataToWAL`: metadata-wal-records feature enabled or not
 // *`data`: different sizes of metrics scraped e.g. one big gauge metric family
 //  with a thousand series and more realistic scenario with common types.
 // *`fmt`: different scrape formats which will benchmark different parsers e.g.
@@ -1343,64 +1558,116 @@ func TestPromTextToProto(t *testing.T) {
 // Recommended CLI invocation:
 /*
 	export bench=append && go test ./scrape/... \
-		-run '^$' -bench '^BenchmarkScrapeLoopAppend' \
-		-benchtime 5s -count 6 -cpu 2 -timeout 999m \
+		-run '^$' -bench '^BenchmarkScrapeLoopAppend$' \
+		-benchtime 2s -count 6 -cpu 2 -timeout 999m \
 		| tee ${bench}.txt
 */
 func BenchmarkScrapeLoopAppend(b *testing.B) {
-	for _, data := range []struct {
-		name         string
-		parsableText []byte
-	}{
-		{name: "1Fam1000Gauges", parsableText: makeTestGauges(2000)},         // ~68.1 KB, ~77.9 KB in proto.
-		{name: "237FamsAllTypes", parsableText: readTextParseTestMetrics(b)}, // ~185.7 KB, ~70.6 KB in proto.
-	} {
-		b.Run(fmt.Sprintf("data=%v", data.name), func(b *testing.B) {
-			metricsProto := promTextToProto(b, data.parsableText)
-
-			for _, bcase := range []struct {
-				name        string
-				contentType string
-				parsable    []byte
+	for _, appV2 := range []bool{false, true} {
+		for _, appendMetadataToWAL := range []bool{false, true} {
+			for _, data := range []struct {
+				name         string
+				parsableText []byte
 			}{
-				{name: "PromText", contentType: "text/plain", parsable: data.parsableText},
-				{name: "OMText", contentType: "application/openmetrics-text", parsable: data.parsableText},
-				{name: "PromProto", contentType: "application/vnd.google.protobuf", parsable: metricsProto},
+				{name: "1Fam1000Gauges", parsableText: makeTestGauges(2000)},         // ~68.1 KB, ~77.9 KB in proto.
+				{name: "237FamsAllTypes", parsableText: readTextParseTestMetrics(b)}, // ~185.7 KB, ~70.6 KB in proto.
 			} {
-				b.Run(fmt.Sprintf("fmt=%v", bcase.name), func(b *testing.B) {
-					// Need a full storage for correct Add/AddFast semantics.
-					s := teststorage.New(b)
-					b.Cleanup(func() { _ = s.Close() })
+				b.Run(fmt.Sprintf("appV2=%v/appendMetadataToWAL=%v/data=%v", appV2, appendMetadataToWAL, data.name), func(b *testing.B) {
+					metricsProto := promTextToProto(b, data.parsableText)
 
-					sl, _ := newTestScrapeLoop(b, withAppendable(s))
-					app := sl.appender()
-					ts := time.Time{}
-
-					b.ReportAllocs()
-					b.ResetTimer()
-					for b.Loop() {
-						ts = ts.Add(time.Second)
-						_, _, _, err := app.append(bcase.parsable, bcase.contentType, ts)
-						if err != nil {
-							b.Fatal(err)
-						}
-						app.Rollback() // Reset the appender so it doesn't grow indefinitely.
-						app = sl.appender()
+					for _, bcase := range []struct {
+						name        string
+						contentType string
+						parsable    []byte
+					}{
+						{name: "PromText", contentType: "text/plain", parsable: data.parsableText},
+						{name: "OMText", contentType: "application/openmetrics-text", parsable: data.parsableText},
+						{name: "PromProto", contentType: "application/vnd.google.protobuf", parsable: metricsProto},
+					} {
+						b.Run(fmt.Sprintf("fmt=%v", bcase.name), func(b *testing.B) {
+							benchScrapeLoopAppend(b, appV2, bcase.parsable, bcase.contentType, appendMetadataToWAL, false)
+						})
 					}
 				})
 			}
+		}
+	}
+}
+
+func benchScrapeLoopAppend(
+	b *testing.B,
+	appV2 bool,
+	parsable []byte,
+	contentType string,
+	appendMetadataToWAL bool,
+	enableExemplarStorage bool,
+) {
+	// Need a full storage for correct Add/AddFast semantics.
+	s := teststorage.New(b, func(opt *tsdb.Options) {
+		opt.EnableMetadataWALRecords = appendMetadataToWAL
+		if enableExemplarStorage {
+			opt.EnableExemplarStorage = true
+			opt.MaxExemplars = 1e5
+		}
+	})
+	b.Cleanup(func() { _ = s.Close() })
+
+	sl, _ := newTestScrapeLoop(b, withAppendable(s, appV2), func(sl *scrapeLoop) {
+		sl.appendMetadataToWAL = appendMetadataToWAL
+	})
+	app := sl.appender()
+	ts := time.Time{}
+
+	b.ReportAllocs()
+	b.ResetTimer()
+	for b.Loop() {
+		ts = ts.Add(time.Second)
+		_, _, _, err := app.append(parsable, contentType, ts)
+		if err != nil {
+			b.Fatal(err)
+		}
+		// Reset the appender so it doesn't grow indefinitely, and it mimics what prod scrape will do.
+		// We do rollback, because it's cheaper than Commit.
+		if err := app.Rollback(); err != nil {
+			b.Fatal(err)
+		}
+		app = sl.appender()
+	}
+}
+
+// BenchmarkScrapeLoopAppend_HistogramsWithExemplars benchmarks OM scrapes with histograms full of exemplars.
+//
+// For e2e TSDB impact, we enable the TSDB exemplar storage
+//
+// Recommended CLI invocation:
+/*
+	export bench=appendHistWithExemplars && go test ./scrape/... \
+		-run '^$' -bench '^BenchmarkScrapeLoopAppend_HistogramsWithExemplars' \
+		-benchtime 5s -count 6 -cpu 2 -timeout 999m \
+		| tee ${bench}.txt
+*/
+func BenchmarkScrapeLoopAppend_HistogramsWithExemplars(b *testing.B) {
+	for _, appV2 := range []bool{false, true} {
+		b.Run(fmt.Sprintf("appV2=%v", appV2), func(b *testing.B) {
+			parsable := makeTestHistogramsWithExemplars(100) // ~255.8 KB in OM text.
+			benchScrapeLoopAppend(b, appV2, parsable, "application/openmetrics-text", false, true)
 		})
 	}
 }
 
 func TestScrapeLoopScrapeAndReport(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopScrapeAndReport(t, appV2)
+	})
+}
+
+func testScrapeLoopScrapeAndReport(t *testing.T, appV2 bool) {
 	parsableText := readTextParseTestMetrics(t)
 	// On windows \r is added when reading, but parsers do not support this. Kill it.
 	parsableText = bytes.ReplaceAll(parsableText, []byte("\r"), nil)
 
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.fallbackScrapeProtocol = "application/openmetrics-text"
 	})
 	scraper.scrapeFunc = func(_ context.Context, writer io.Writer) error {
@@ -1420,38 +1687,49 @@ func TestScrapeLoopScrapeAndReport(t *testing.T) {
 // Recommended CLI invocation:
 /*
 	export bench=scrapeAndReport && go test ./scrape/... \
-		-run '^$' -bench '^BenchmarkScrapeLoopScrapeAndReport' \
+		-run '^$' -bench '^BenchmarkScrapeLoopScrapeAndReport$' \
 		-benchtime 5s -count 6 -cpu 2 -timeout 999m \
 		| tee ${bench}.txt
 */
 func BenchmarkScrapeLoopScrapeAndReport(b *testing.B) {
-	parsableText := readTextParseTestMetrics(b)
+	for _, appV2 := range []bool{false, true} {
+		b.Run(fmt.Sprintf("appV2=%v", appV2), func(b *testing.B) {
+			parsableText := readTextParseTestMetrics(b)
 
-	s := teststorage.New(b)
-	b.Cleanup(func() { _ = s.Close() })
+			s := teststorage.New(b)
+			b.Cleanup(func() { _ = s.Close() })
 
-	sl, scraper := newTestScrapeLoop(b, func(sl *scrapeLoop) {
-		sl.appendable = s
-		sl.fallbackScrapeProtocol = "application/openmetrics-text"
-	})
-	scraper.scrapeFunc = func(_ context.Context, writer io.Writer) error {
-		_, err := writer.Write(parsableText)
-		return err
-	}
+			sl, scraper := newTestScrapeLoop(b, withAppendable(s, appV2), func(sl *scrapeLoop) {
+				sl.fallbackScrapeProtocol = "application/openmetrics-text"
+			})
+			scraper.scrapeFunc = func(_ context.Context, writer io.Writer) error {
+				_, err := writer.Write(parsableText)
+				return err
+			}
 
-	ts := time.Time{}
+			ts := time.Time{}
 
-	b.ReportAllocs()
-	b.ResetTimer()
-	for b.Loop() {
-		ts = ts.Add(time.Second)
-		sl.scrapeAndReport(time.Time{}, ts, nil)
-		require.NoError(b, scraper.lastError)
+			b.ReportAllocs()
+			b.ResetTimer()
+			for b.Loop() {
+				ts = ts.Add(time.Second)
+				sl.scrapeAndReport(time.Time{}, ts, nil)
+				require.NoError(b, scraper.lastError)
+			}
+		})
 	}
 }
 
 func TestSetOptionsHandlingStaleness(t *testing.T) {
-	s := teststorage.New(t, 600000)
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testSetOptionsHandlingStaleness(t, appV2)
+	})
+}
+
+func testSetOptionsHandlingStaleness(t *testing.T, appV2 bool) {
+	s := teststorage.New(t, func(opt *tsdb.Options) {
+		opt.OutOfOrderTimeWindow = 600000
+	})
 	t.Cleanup(func() { _ = s.Close() })
 
 	signal := make(chan struct{}, 1)
@@ -1460,9 +1738,8 @@ func TestSetOptionsHandlingStaleness(t *testing.T) {
 
 	// Function to run the scrape loop
 	runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) {
-		sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+		sl, scraper := newTestScrapeLoop(t, withAppendable(s, appV2), func(sl *scrapeLoop) {
 			sl.ctx = ctx
-			sl.appendable = s
 		})
 
 		numScrapes := 0
@@ -1526,17 +1803,22 @@ func TestSetOptionsHandlingStaleness(t *testing.T) {
 			c++
 		}
 	}
-	require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine")
+	require.Zero(t, c, "invalid count of staleness markers after stopping the engine")
 }
 
 func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t, appV2)
+	})
+}
+
+func testScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T, appV2 bool) {
 	signal := make(chan struct{}, 1)
 
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
-		sl.appendable = appTest
 		// Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 	})
@@ -1578,13 +1860,18 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) {
 }
 
 func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunCreatesStaleMarkersOnParseFailure(t, appV2)
+	})
+}
+
+func testScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T, appV2 bool) {
 	signal := make(chan struct{}, 1)
 
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
-		sl.appendable = appTest
 		// Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 	})
@@ -1632,13 +1919,18 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) {
 // If we have a target with sample_limit set and scrape initially works, but then we hit the sample_limit error,
 // then we don't expect to see any StaleNaNs appended for the series that disappeared due to sample_limit error.
 func TestScrapeLoopRunCreatesStaleMarkersOnSampleLimit(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunCreatesStaleMarkersOnSampleLimit(t, appV2)
+	})
+}
+
+func testScrapeLoopRunCreatesStaleMarkersOnSampleLimit(t *testing.T, appV2 bool) {
 	signal := make(chan struct{}, 1)
 
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
-		sl.appendable = appTest
 		// Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 		sl.sampleLimit = 4
@@ -1702,6 +1994,12 @@ func TestScrapeLoopRunCreatesStaleMarkersOnSampleLimit(t *testing.T) {
 }
 
 func TestScrapeLoopCache(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopCache(t, appV2)
+	})
+}
+
+func testScrapeLoopCache(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
@@ -1709,10 +2007,9 @@ func TestScrapeLoopCache(t *testing.T) {
 
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable().Then(s)
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
 		sl.l = promslog.New(&promslog.Config{})
-		sl.appendable = appTest
 		// Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 		// Decreasing the scrape interval could make the test fail, as multiple scrapes might be initiated at identical millisecond timestamps.
@@ -1767,13 +2064,19 @@ func TestScrapeLoopCache(t *testing.T) {
 }
 
 func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopCacheMemoryExhaustionProtection(t, appV2)
+	})
+}
+
+func testScrapeLoopCacheMemoryExhaustionProtection(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
 	signal := make(chan struct{}, 1)
 
 	ctx, cancel := context.WithCancel(t.Context())
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable().Then(s), appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
 	})
 	numScrapes := 0
@@ -1805,138 +2108,124 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) {
 	require.LessOrEqual(t, len(sl.cache.series), 2000, "More than 2000 series cached.")
 }
 
-func TestScrapeLoopAppend(t *testing.T) {
-	tests := []struct {
-		title           string
-		honorLabels     bool
-		scrapeLabels    string
-		discoveryLabels []string
-		expLset         labels.Labels
-		expValue        float64
-	}{
-		{
-			// When "honor_labels" is not set
-			// label name collision is handler by adding a prefix.
-			title:           "Label name collision",
-			honorLabels:     false,
-			scrapeLabels:    `metric{n="1"} 0`,
-			discoveryLabels: []string{"n", "2"},
-			expLset:         labels.FromStrings("__name__", "metric", "exported_n", "1", "n", "2"),
-			expValue:        0,
-		}, {
-			// When "honor_labels" is not set
-			// exported label from discovery don't get overwritten
-			title:           "Label name collision",
-			honorLabels:     false,
-			scrapeLabels:    `metric 0`,
-			discoveryLabels: []string{"n", "2", "exported_n", "2"},
-			expLset:         labels.FromStrings("__name__", "metric", "n", "2", "exported_n", "2"),
-			expValue:        0,
-		}, {
-			// Labels with no value need to be removed as these should not be ingested.
-			title:           "Delete Empty labels",
-			honorLabels:     false,
-			scrapeLabels:    `metric{n=""} 0`,
-			discoveryLabels: nil,
-			expLset:         labels.FromStrings("__name__", "metric"),
-			expValue:        0,
-		}, {
-			// Honor Labels should ignore labels with the same name.
-			title:           "Honor Labels",
-			honorLabels:     true,
-			scrapeLabels:    `metric{n1="1", n2="2"} 0`,
-			discoveryLabels: []string{"n1", "0"},
-			expLset:         labels.FromStrings("__name__", "metric", "n1", "1", "n2", "2"),
-			expValue:        0,
-		}, {
-			title:           "Stale - NaN",
-			honorLabels:     false,
-			scrapeLabels:    `metric NaN`,
-			discoveryLabels: nil,
-			expLset:         labels.FromStrings("__name__", "metric"),
-			expValue:        math.Float64frombits(value.NormalNaN),
-		},
-	}
-
-	for _, test := range tests {
-		discoveryLabels := &Target{
-			labels: labels.FromStrings(test.discoveryLabels...),
-		}
-
-		appTest := teststorage.NewAppendable()
-		sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-			sl.appendable = appTest
-			sl.sampleMutator = func(l labels.Labels) labels.Labels {
-				return mutateSampleLabels(l, discoveryLabels, test.honorLabels, nil)
-			}
-			sl.reportSampleMutator = func(l labels.Labels) labels.Labels {
-				return mutateReportSampleLabels(l, discoveryLabels)
-			}
-		})
-
-		now := time.Now()
-
-		app := sl.appender()
-		_, _, _, err := app.append([]byte(test.scrapeLabels), "text/plain", now)
-		require.NoError(t, err)
-		require.NoError(t, app.Commit())
-
-		expected := []sample{
-			{
-				L: test.expLset,
-				T: timestamp.FromTime(now),
-				V: test.expValue,
-			},
-		}
-
-		t.Logf("Test:%s", test.title)
-		requireEqual(t, expected, appTest.ResultSamples())
-	}
+func TestScrapeLoopAppend_HonorLabels(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendHonorLabels(t, appV2)
+	})
 }
 
-func requireEqual(t *testing.T, expected, actual any, msgAndArgs ...any) {
-	t.Helper()
-	testutil.RequireEqualWithOptions(t, expected, actual,
-		[]cmp.Option{
-			cmp.Comparer(func(a, b sample) bool { return a.Equals(b) }),
-			// StaleNaN samples are generated by iterating over a map, which means that the order
-			// of samples might be different on every test run. Sort series by label to avoid
-			// test failures because of that.
-			cmpopts.SortSlices(func(a, b sample) int {
-				return labels.Compare(a.L, b.L)
-			}),
+func testScrapeLoopAppendHonorLabels(t *testing.T, appV2 bool) {
+	for _, test := range []struct {
+		title           string
+		honorLabels     bool
+		scrapeText      string
+		discoveryLabels []string
+		expLset         labels.Labels
+	}{
+		{
+			// On label collision, when "honor_labels" is not set, prefix is added.
+			title:           "HonorLabels=false",
+			scrapeText:      `metric{n="1"} 1`,
+			discoveryLabels: []string{"n", "2"},
+			expLset:         labels.FromStrings("__name__", "metric", "exported_n", "1", "n", "2"),
 		},
-		msgAndArgs...)
+		{
+			// Case where SD already has the prefixed label - it shouldn't be overridden.
+			title:           "HonorLabels=false;exported prefix already exists in SD",
+			scrapeText:      `metric{n="1"} 1`,
+			discoveryLabels: []string{"n", "2", "exported_n", "2"},
+			expLset:         labels.FromStrings("__name__", "metric", "n", "2", "exported_n", "2", "exported_exported_n", "1"),
+		},
+		{
+			// Labels with no value need to be removed as these should not be ingested.
+			title:           "HonorLabels=false;empty label",
+			scrapeText:      `metric{n=""} 1`,
+			discoveryLabels: nil,
+			expLset:         labels.FromStrings("__name__", "metric"),
+		},
+		{
+			// On label collision, when "honor_labels" is true, label is overridden.
+			title:           "HonorLabels=true",
+			honorLabels:     true,
+			scrapeText:      `metric{n="1"} 1`,
+			discoveryLabels: []string{"n", "2"},
+			expLset:         labels.FromStrings("__name__", "metric", "n", "1"),
+		},
+	} {
+		t.Run(test.title, func(t *testing.T) {
+			discoveryLabels := &Target{
+				labels: labels.FromStrings(test.discoveryLabels...),
+			}
+
+			appTest := teststorage.NewAppendable()
+			sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
+				sl.sampleMutator = func(l labels.Labels) labels.Labels {
+					return mutateSampleLabels(l, discoveryLabels, test.honorLabels, nil)
+				}
+				sl.reportSampleMutator = func(l labels.Labels) labels.Labels {
+					return mutateReportSampleLabels(l, discoveryLabels)
+				}
+			})
+
+			now := time.Now()
+
+			app := sl.appender()
+			_, _, _, err := app.append([]byte(test.scrapeText), "text/plain", now)
+			require.NoError(t, err)
+			require.NoError(t, app.Commit())
+
+			expected := []sample{
+				{
+					L: test.expLset,
+					T: timestamp.FromTime(now),
+					V: 1,
+				},
+			}
+			teststorage.RequireEqual(t, expected, appTest.ResultSamples())
+		})
+	}
 }
 
 func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
-	testcases := map[string]struct {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendForConflictingPrefixedLabels(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T, appV2 bool) {
+	for _, tc := range []struct {
+		name          string
 		targetLabels  []string
 		exposedLabels string
 		expected      []string
 	}{
-		"One target label collides with existing label": {
+		{
+			name:          "One target label collides with existing label",
 			targetLabels:  []string{"foo", "2"},
 			exposedLabels: `metric{foo="1"} 0`,
 			expected:      []string{"__name__", "metric", "exported_foo", "1", "foo", "2"},
 		},
 
-		"One target label collides with existing label, plus target label already with prefix 'exported'": {
+		{
+			name:          "One target label collides with existing label, plus target label already with prefix 'exported'",
 			targetLabels:  []string{"foo", "2", "exported_foo", "3"},
 			exposedLabels: `metric{foo="1"} 0`,
 			expected:      []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "3", "foo", "2"},
 		},
-		"One target label collides with existing label, plus existing label already with prefix 'exported": {
+		{
+			name:          "One target label collides with existing label, plus existing label already with prefix 'exported",
 			targetLabels:  []string{"foo", "3"},
 			exposedLabels: `metric{foo="1", exported_foo="2"} 0`,
 			expected:      []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "2", "foo", "3"},
 		},
-		"One target label collides with existing label, both already with prefix 'exported'": {
+		{
+			name:          "One target label collides with existing label, both already with prefix 'exported'",
 			targetLabels:  []string{"exported_foo", "2"},
 			exposedLabels: `metric{exported_foo="1"} 0`,
 			expected:      []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "2"},
 		},
-		"Two target labels collide with existing labels, both with and without prefix 'exported'": {
+		{
+			name:          "Two target labels collide with existing labels, both with and without prefix 'exported'",
 			targetLabels:  []string{"foo", "3", "exported_foo", "4"},
 			exposedLabels: `metric{foo="1", exported_foo="2"} 0`,
 			expected: []string{
@@ -1944,7 +2233,8 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
 				"2", "exported_foo", "4", "foo", "3",
 			},
 		},
-		"Extreme example": {
+		{
+			name:          "Extreme example",
 			targetLabels:  []string{"foo", "0", "exported_exported_foo", "1", "exported_exported_exported_foo", "2"},
 			exposedLabels: `metric{foo="3", exported_foo="4", exported_exported_exported_foo="5"} 0`,
 			expected: []string{
@@ -1957,13 +2247,10 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
 				"foo", "0",
 			},
 		},
-	}
-
-	for name, tc := range testcases {
-		t.Run(name, func(t *testing.T) {
+	} {
+		t.Run(tc.name, func(t *testing.T) {
 			appTest := teststorage.NewAppendable()
-			sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-				sl.appendable = appTest
+			sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 				sl.sampleMutator = func(l labels.Labels) labels.Labels {
 					return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil)
 				}
@@ -1975,7 +2262,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
 
 			require.NoError(t, app.Commit())
 
-			requireEqual(t, []sample{
+			teststorage.RequireEqual(t, []sample{
 				{
 					L: labels.FromStrings(tc.expected...),
 					T: timestamp.FromTime(time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC)),
@@ -1987,8 +2274,14 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
 }
 
 func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendCacheEntryButErrNotFound(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	fakeRef := storage.SeriesRef(1)
 	expValue := float64(1)
@@ -2019,8 +2312,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) {
 			V: expValue,
 		},
 	}
-
-	require.Equal(t, expected, appTest.ResultSamples())
+	teststorage.RequireEqual(t, expected, appTest.ResultSamples())
 }
 
 type appendableFunc func(ctx context.Context) storage.Appender
@@ -2028,12 +2320,26 @@ type appendableFunc func(ctx context.Context) storage.Appender
 func (a appendableFunc) Appender(ctx context.Context) storage.Appender { return a(ctx) }
 
 func TestScrapeLoopAppendSampleLimit(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendSampleLimit(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendSampleLimit(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable()
 	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender {
-			// Chain appTest to verify what samples passed through.
-			return &limitAppender{Appender: appTest.Appender(ctx), limit: 1}
-		})
+		if appV2 {
+			sl.appendableV2 = appendableV2Func(func(ctx context.Context) storage.AppenderV2 {
+				// Chain appTest to verify what samples passed through.
+				return &limitAppenderV2{AppenderV2: appTest.AppenderV2(ctx), limit: 1}
+			})
+		} else {
+			sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender {
+				// Chain appTest to verify what samples passed through.
+				return &limitAppender{Appender: appTest.Appender(ctx), limit: 1}
+			})
+		}
+
 		sl.sampleMutator = func(l labels.Labels) labels.Labels {
 			if l.Has("deleteme") {
 				return labels.EmptyLabels()
@@ -2077,7 +2383,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
 			V: 1,
 		},
 	}
-	requireEqual(t, want, appTest.RolledbackSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.RolledbackSamples(), "Appended samples not as expected:\n%s", appTest)
 
 	now = time.Now()
 	app = sl.appender()
@@ -2090,10 +2396,23 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
 }
 
 func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopHistogramBucketLimit(t, appV2)
+	})
+}
+
+func testScrapeLoopHistogramBucketLimit(t *testing.T, appV2 bool) {
 	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender {
-			return &bucketLimitAppender{Appender: teststorage.NewAppendable().Appender(ctx), limit: 2}
-		})
+		if appV2 {
+			sl.appendableV2 = appendableV2Func(func(ctx context.Context) storage.AppenderV2 {
+				return &bucketLimitAppenderV2{AppenderV2: teststorage.NewAppendable().AppenderV2(ctx), limit: 2}
+			})
+		} else {
+			sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender {
+				return &bucketLimitAppender{Appender: teststorage.NewAppendable().Appender(ctx), limit: 2}
+			})
+		}
+
 		sl.enableNativeHistogramScraping = true
 		sl.sampleMutator = func(l labels.Labels) labels.Labels {
 			if l.Has("deleteme") {
@@ -2199,11 +2518,17 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
 }
 
 func TestScrapeLoop_ChangingMetricString(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopChangingMetricString(t, appV2)
+	})
+}
+
+func testScrapeLoopChangingMetricString(t *testing.T, appV2 bool) {
 	// This is a regression test for the scrape loop cache not properly maintaining
 	// IDs when the string representation of a metric changes across a scrape. Thus,
 	// we use a real storage appender here.
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	now := time.Now()
 	app := sl.appender()
@@ -2228,11 +2553,17 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) {
 			V: 2,
 		},
 	}
-	require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendFailsWithNoContentType(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendFailsWithNoContentType(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		// Explicitly setting the lack of fallback protocol here to make it obvious.
 		sl.fallbackScrapeProtocol = ""
 	})
@@ -2246,7 +2577,13 @@ func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) {
 
 // TestScrapeLoopAppendEmptyWithNoContentType ensures we there are no errors when we get a blank scrape or just want to append a stale marker.
 func TestScrapeLoopAppendEmptyWithNoContentType(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendEmptyWithNoContentType(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendEmptyWithNoContentType(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		// Explicitly setting the lack of fallback protocol here to make it obvious.
 		sl.fallbackScrapeProtocol = ""
 	})
@@ -2259,8 +2596,14 @@ func TestScrapeLoopAppendEmptyWithNoContentType(t *testing.T) {
 }
 
 func TestScrapeLoopAppendStaleness(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendStaleness(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendStaleness(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	now := time.Now()
 	app := sl.appender()
@@ -2285,12 +2628,18 @@ func TestScrapeLoopAppendStaleness(t *testing.T) {
 			V: math.Float64frombits(value.StaleNaN),
 		},
 	}
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendNoStalenessIfTimestamp(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 	now := time.Now()
 	app := sl.appender()
 	_, _, _, err := app.append([]byte("metric_a 1 1000\n"), "text/plain", now)
@@ -2309,13 +2658,18 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
 			V: 1,
 		},
 	}
-	require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendStalenessIfTrackTimestampStaleness(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.trackTimestampsStaleness = true
 	})
 
@@ -2342,11 +2696,18 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) {
 			V: math.Float64frombits(value.StaleNaN),
 		},
 	}
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
-func TestScrapeLoopAppendExemplar(t *testing.T) {
-	tests := []struct {
+// TestScrapeLoopAppend is the main table test testing the scrape appends, including histograms, exemplar and metadata.
+func TestScrapeLoopAppend(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppend(t, appV2)
+	})
+}
+
+func testScrapeLoopAppend(t *testing.T, appV2 bool) {
+	for _, test := range []struct {
 		title                           string
 		alwaysScrapeClassicHist         bool
 		enableNativeHistogramsIngestion bool
@@ -2355,6 +2716,15 @@ func TestScrapeLoopAppendExemplar(t *testing.T) {
 		discoveryLabels                 []string
 		samples                         []sample
 	}{
+		{
+			title:       "Normal NaN scraped",
+			scrapeText:  "metric_total{n=\"1\"} NaN\n# EOF",
+			contentType: "application/openmetrics-text",
+			samples: []sample{{
+				L: labels.FromStrings("__name__", "metric_total", "n", "1"),
+				V: math.Float64frombits(value.NormalNaN),
+			}},
+		},
 		{
 			title:           "Metric without exemplars",
 			scrapeText:      "metric_total{n=\"1\"} 0\n# EOF",
@@ -2614,22 +2984,6 @@ metric: <
 			alwaysScrapeClassicHist: true,
 			contentType:             "application/vnd.google.protobuf",
 			samples: []sample{
-				{L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175},
-				{L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094},
-				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), T: 1234568, V: 2},
-				{
-					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), T: 1234568, V: 4,
-					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}},
-				},
-				{
-					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), T: 1234568, V: 16,
-					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false}},
-				},
-				{
-					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), T: 1234568, V: 32,
-					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}},
-				},
-				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
 				{
 					T: 1234568,
 					L: labels.FromStrings("__name__", "test_histogram"),
@@ -2657,6 +3011,22 @@ metric: <
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
 					},
 				},
+				{L: labels.FromStrings("__name__", "test_histogram_count"), T: 1234568, V: 175},
+				{L: labels.FromStrings("__name__", "test_histogram_sum"), T: 1234568, V: 0.0008280461746287094},
+				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), T: 1234568, V: 2},
+				{
+					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), T: 1234568, V: 4,
+					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}},
+				},
+				{
+					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), T: 1234568, V: 16,
+					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false}},
+				},
+				{
+					L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), T: 1234568, V: 32,
+					ES: []exemplar.Exemplar{{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}},
+				},
+				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
 			},
 		},
 		{
@@ -2838,17 +3208,14 @@ metric: <
 				{L: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), T: 1234568, V: 175},
 			},
 		},
-	}
-
-	for _, test := range tests {
+	} {
 		t.Run(test.title, func(t *testing.T) {
 			discoveryLabels := &Target{
 				labels: labels.FromStrings(test.discoveryLabels...),
 			}
 
 			appTest := teststorage.NewAppendable()
-			sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-				sl.appendable = appTest
+			sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 				sl.enableNativeHistogramScraping = test.enableNativeHistogramsIngestion
 				sl.sampleMutator = func(l labels.Labels) labels.Labels {
 					return mutateSampleLabels(l, discoveryLabels, false, nil)
@@ -2857,15 +3224,23 @@ metric: <
 					return mutateReportSampleLabels(l, discoveryLabels)
 				}
 				sl.alwaysScrapeClassicHist = test.alwaysScrapeClassicHist
-				// This test does not care about metadata. Having this true would mean we need to add metadata to sample
+				// This test does not care about metadata.
+				// Having this true would mean we need to add metadata to sample
 				// expectations.
+				// TODO(bwplotka): Add cases for append metadata to WAL and pass metadata
 				sl.appendMetadataToWAL = false
 			})
 			app := sl.appender()
 
 			now := time.Now()
 
+			// Process expected samples.
 			for i := range test.samples {
+				if !appV2 && test.samples[i].MF != "" {
+					// AppenderV1 does not support metric family passing.
+					test.samples[i].MF = ""
+				}
+
 				if test.samples[i].T != 0 {
 					continue
 				}
@@ -2889,7 +3264,7 @@ metric: <
 			_, _, _, err := app.append(buf.Bytes(), test.contentType, now)
 			require.NoError(t, err)
 			require.NoError(t, app.Commit())
-			requireEqual(t, test.samples, appTest.ResultSamples())
+			teststorage.RequireEqual(t, test.samples, appTest.ResultSamples())
 		})
 	}
 }
@@ -2915,6 +3290,12 @@ func textToProto(text string, buf *bytes.Buffer) error {
 }
 
 func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendExemplarSeries(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendExemplarSeries(t *testing.T, appV2 bool) {
 	scrapeText := []string{`metric_total{n="1"} 1 # {t="1"} 1.0 10000
 # EOF`, `metric_total{n="1"} 2 # {t="2"} 2.0 20000
 # EOF`}
@@ -2936,8 +3317,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
 	}
 
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.sampleMutator = func(l labels.Labels) labels.Labels {
 			return mutateSampleLabels(l, discoveryLabels, false, nil)
 		}
@@ -2962,15 +3342,20 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
 		require.NoError(t, app.Commit())
 	}
 
-	requireEqual(t, samples, appTest.ResultSamples())
+	teststorage.RequireEqual(t, samples, appTest.ResultSamples())
 }
 
 func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunReportsTargetDownOnScrapeError(t, appV2)
+	})
+}
+
+func testScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T, appV2 bool) {
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
-		sl.appendable = appTest
 	})
 	scraper.scrapeFunc = func(context.Context, io.Writer) error {
 		cancel()
@@ -2982,11 +3367,16 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) {
 }
 
 func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunReportsTargetDownOnInvalidUTF8(t, appV2)
+	})
+}
+
+func testScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T, appV2 bool) {
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
-		sl.appendable = appTest
 	})
 	scraper.scrapeFunc = func(_ context.Context, w io.Writer) error {
 		cancel()
@@ -2999,6 +3389,12 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) {
 }
 
 func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T, appV2 bool) {
 	appTest := teststorage.NewAppendable().WithErrs(
 		func(ls labels.Labels) error {
 			switch ls.Get(model.MetricNameLabel) {
@@ -3012,7 +3408,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T
 				return nil
 			}
 		}, nil, nil)
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	now := time.Unix(1, 0)
 	app := sl.appender()
@@ -3027,21 +3423,36 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T
 			V: 1,
 		},
 	}
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 	require.Equal(t, 4, total)
 	require.Equal(t, 4, added)
 	require.Equal(t, 1, seriesAdded)
 }
 
 func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t, withAppendable(
-		appendableFunc(func(ctx context.Context) storage.Appender {
-			return &timeLimitAppender{
-				Appender: teststorage.NewAppendable().Appender(ctx),
-				maxTime:  timestamp.FromTime(time.Now().Add(10 * time.Minute)),
-			}
-		}),
-	))
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopOutOfBoundsTimeError(t, appV2)
+	})
+}
+
+func testScrapeLoopOutOfBoundsTimeError(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+		if appV2 {
+			sl.appendableV2 = appendableV2Func(func(ctx context.Context) storage.AppenderV2 {
+				return &timeLimitAppenderV2{
+					AppenderV2: teststorage.NewAppendable().AppenderV2(ctx),
+					maxTime:    timestamp.FromTime(time.Now().Add(10 * time.Minute)),
+				}
+			})
+		} else {
+			sl.appendable = appendableFunc(func(ctx context.Context) storage.Appender {
+				return &timeLimitAppender{
+					Appender: teststorage.NewAppendable().Appender(ctx),
+					maxTime:  timestamp.FromTime(time.Now().Add(10 * time.Minute)),
+				}
+			})
+		}
+	})
 
 	now := time.Now().Add(20 * time.Minute)
 	app := sl.appender()
@@ -3463,11 +3874,17 @@ func (ts *testScraper) readResponse(ctx context.Context, _ *http.Response, w io.
 }
 
 func TestScrapeLoop_RespectTimestamps(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRespectTimestamps(t, appV2)
+	})
+}
+
+func testScrapeLoopRespectTimestamps(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
 	appTest := teststorage.NewAppendable().Then(s)
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	now := time.Now()
 	app := sl.appender()
@@ -3482,16 +3899,21 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) {
 			V: 1,
 		},
 	}
-	require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 func TestScrapeLoop_DiscardTimestamps(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopDiscardTimestamps(t, appV2)
+	})
+}
+
+func testScrapeLoopDiscardTimestamps(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
 	appTest := teststorage.NewAppendable().Then(s)
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.honorTimestamps = false
 	})
 
@@ -3508,15 +3930,21 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) {
 			V: 1,
 		},
 	}
-	require.Equal(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopDiscardDuplicateLabels(t, appV2)
+	})
+}
+
+func testScrapeLoopDiscardDuplicateLabels(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
 	appTest := teststorage.NewAppendable().Then(s)
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 
 	// We add a good and a bad metric to check that both are discarded.
 	app := sl.appender()
@@ -3548,12 +3976,17 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) {
 }
 
 func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopDiscardUnnamedMetrics(t, appV2)
+	})
+}
+
+func testScrapeLoopDiscardUnnamedMetrics(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
 	appTest := teststorage.NewAppendable().Then(s)
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.sampleMutator = func(l labels.Labels) labels.Labels {
 			if l.Has("drop") {
 				return labels.FromStrings("no", "name") // This label set will trigger an error.
@@ -3643,6 +4076,12 @@ func TestReusableConfig(t *testing.T) {
 }
 
 func TestReuseScrapeCache(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testReuseScrapeCache(t, appV2)
+	})
+}
+
+func testReuseScrapeCache(t *testing.T, appV2 bool) {
 	var (
 		app = teststorage.NewAppendable()
 		cfg = &config.ScrapeConfig{
@@ -3653,7 +4092,8 @@ func TestReuseScrapeCache(t *testing.T) {
 			MetricNameValidationScheme: model.UTF8Validation,
 			MetricNameEscapingScheme:   model.AllowUTF8,
 		}
-		sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+		sa    = selectAppendable(app, appV2)
+		sp, _ = newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 		t1    = &Target{
 			labels: labels.FromStrings("labelNew", "nameNew", "labelNew1", "nameNew1", "labelNew2", "nameNew2"),
 			scrapeConfig: &config.ScrapeConfig{
@@ -3827,10 +4267,16 @@ func TestReuseScrapeCache(t *testing.T) {
 }
 
 func TestScrapeAddFast(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeAddFast(t, appV2)
+	})
+}
+
+func testScrapeAddFast(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
-	sl, _ := newTestScrapeLoop(t, withAppendable(s))
+	sl, _ := newTestScrapeLoop(t, withAppendable(s, appV2))
 
 	app := sl.appender()
 	_, _, _, err := app.append([]byte("up 1\n"), "text/plain", time.Time{})
@@ -3850,6 +4296,12 @@ func TestScrapeAddFast(t *testing.T) {
 }
 
 func TestReuseCacheRace(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testReuseCacheRace(t, appV2)
+	})
+}
+
+func testReuseCacheRace(t *testing.T, appV2 bool) {
 	var (
 		cfg = &config.ScrapeConfig{
 			JobName:                    "Prometheus",
@@ -3860,7 +4312,8 @@ func TestReuseCacheRace(t *testing.T) {
 			MetricNameEscapingScheme:   model.AllowUTF8,
 		}
 		buffers = pool.New(1e3, 100e6, 3, func(sz int) any { return make([]byte, 0, sz) })
-		sp, _   = newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, buffers, &Options{}, newTestScrapeMetrics(t))
+		sa      = selectAppendable(teststorage.NewAppendable(), appV2)
+		sp, _   = newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, buffers, &Options{}, newTestScrapeMetrics(t))
 		t1      = &Target{
 			labels:       labels.FromStrings("labelNew", "nameNew"),
 			scrapeConfig: &config.ScrapeConfig{},
@@ -3890,13 +4343,18 @@ func TestCheckAddError(t *testing.T) {
 	var appErrs appendErrors
 	sl, _ := newTestScrapeLoop(t)
 	// TODO: Check err etc
-	_, _ = sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs)
+	_, _ = sl.checkAddError(nil, nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs)
 	require.Equal(t, 1, appErrs.numOutOfOrder)
-
 	// TODO(bwplotka): Test partial error check and other cases
 }
 
 func TestScrapeReportSingleAppender(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeReportSingleAppender(t, appV2)
+	})
+}
+
+func testScrapeReportSingleAppender(t *testing.T, appV2 bool) {
 	t.Parallel()
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
@@ -3904,9 +4362,8 @@ func TestScrapeReportSingleAppender(t *testing.T) {
 	signal := make(chan struct{}, 1)
 
 	ctx, cancel := context.WithCancel(t.Context())
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, scraper := newTestScrapeLoop(t, withAppendable(s, appV2), func(sl *scrapeLoop) {
 		sl.ctx = ctx
-		sl.appendable = s
 		// Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 	})
@@ -3953,6 +4410,12 @@ func TestScrapeReportSingleAppender(t *testing.T) {
 }
 
 func TestScrapeReportLimit(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeReportLimit(t, appV2)
+	})
+}
+
+func testScrapeReportLimit(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
@@ -3969,7 +4432,8 @@ func TestScrapeReportLimit(t *testing.T) {
 	ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n")
 	defer ts.Close()
 
-	sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+	sa := selectAppendable(s, appV2)
+	sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	require.NoError(t, err)
 	defer sp.stop()
 
@@ -4009,6 +4473,12 @@ func TestScrapeReportLimit(t *testing.T) {
 }
 
 func TestScrapeUTF8(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeUTF8(t, appV2)
+	})
+}
+
+func testScrapeUTF8(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
@@ -4023,7 +4493,8 @@ func TestScrapeUTF8(t *testing.T) {
 	ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n")
 	defer ts.Close()
 
-	sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+	sa := selectAppendable(s, appV2)
+	sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	require.NoError(t, err)
 	defer sp.stop()
 
@@ -4053,7 +4524,13 @@ func TestScrapeUTF8(t *testing.T) {
 }
 
 func TestScrapeLoopLabelLimit(t *testing.T) {
-	tests := []struct {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopLabelLimit(t, appV2)
+	})
+}
+
+func testScrapeLoopLabelLimit(t *testing.T, appV2 bool) {
+	for _, test := range []struct {
 		title           string
 		scrapeLabels    string
 		discoveryLabels []string
@@ -4115,14 +4592,12 @@ func TestScrapeLoopLabelLimit(t *testing.T) {
 			labelLimits:     labelLimits{labelValueLengthLimit: 10},
 			expectErr:       true,
 		},
-	}
-
-	for _, test := range tests {
+	} {
 		discoveryLabels := &Target{
 			labels: labels.FromStrings(test.discoveryLabels...),
 		}
 
-		sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+		sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 			sl.sampleMutator = func(l labels.Labels) labels.Labels {
 				return mutateSampleLabels(l, discoveryLabels, false, nil)
 			}
@@ -4146,6 +4621,12 @@ func TestScrapeLoopLabelLimit(t *testing.T) {
 }
 
 func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testTargetScrapeIntervalAndTimeoutRelabel(t, appV2)
+	})
+}
+
+func testTargetScrapeIntervalAndTimeoutRelabel(t *testing.T, appV2 bool) {
 	interval, _ := model.ParseDuration("2s")
 	timeout, _ := model.ParseDuration("500ms")
 	cfg := &config.ScrapeConfig{
@@ -4172,7 +4653,9 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) {
 			},
 		},
 	}
-	sp, _ := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+
+	sa := selectAppendable(teststorage.NewAppendable(), appV2)
+	sp, _ := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	tgts := []*targetgroup.Group{
 		{
 			Targets: []model.LabelSet{{model.AddressLabel: "127.0.0.1:9090"}},
@@ -4188,6 +4671,12 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) {
 
 // Testing whether we can remove trailing .0 from histogram 'le' and summary 'quantile' labels.
 func TestLeQuantileReLabel(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testLeQuantileReLabel(t, appV2)
+	})
+}
+
+func testLeQuantileReLabel(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
@@ -4258,7 +4747,8 @@ test_summary_count 199
 	ts, scrapedTwice := newScrapableServer(metricsText)
 	defer ts.Close()
 
-	sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+	sa := selectAppendable(s, appV2)
+	sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	require.NoError(t, err)
 	defer sp.stop()
 
@@ -4307,6 +4797,12 @@ test_summary_count 199
 
 // Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets.
 func TestConvertClassicHistogramsToNHCB(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testConvertClassicHistogramsToNHCB(t, appV2)
+	})
+}
+
+func testConvertClassicHistogramsToNHCB(t *testing.T, appV2 bool) {
 	t.Parallel()
 
 	genTestCounterText := func(name string) string {
@@ -4711,8 +5207,7 @@ metric: <
 				s := teststorage.New(t)
 				t.Cleanup(func() { _ = s.Close() })
 
-				sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-					sl.appendable = s
+				sl, _ := newTestScrapeLoop(t, withAppendable(s, appV2), func(sl *scrapeLoop) {
 					sl.alwaysScrapeClassicHist = tc.alwaysScrapeClassicHistograms
 					sl.convertClassicHistToNHCB = tc.convertClassicHistToNHCB
 					sl.enableNativeHistogramScraping = true
@@ -4791,6 +5286,12 @@ metric: <
 }
 
 func TestTypeUnitReLabel(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testTypeUnitReLabel(t, appV2)
+	})
+}
+
+func testTypeUnitReLabel(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
@@ -4839,7 +5340,8 @@ disk_usage_bytes 456
 	ts, scrapedTwice := newScrapableServer(metricsText)
 	defer ts.Close()
 
-	sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+	sa := selectAppendable(s, appV2)
+	sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	require.NoError(t, err)
 	defer sp.stop()
 
@@ -4877,13 +5379,18 @@ disk_usage_bytes 456
 }
 
 func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t, appV2)
+	})
+}
+
+func testScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T, appV2 bool) {
 	signal := make(chan struct{}, 1)
 
 	ctx, cancel := context.WithCancel(t.Context())
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.ctx = ctx
-		sl.appendable = appTest // Since we're writing samples directly below we need to provide a protocol fallback.
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
+		sl.ctx = ctx // Since we're writing samples directly below we need to provide a protocol fallback.
 		sl.fallbackScrapeProtocol = "text/plain"
 		sl.trackTimestampsStaleness = true
 	})
@@ -4924,6 +5431,12 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *
 }
 
 func TestScrapeLoopCompression(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopCompression(t, appV2)
+	})
+}
+
+func testScrapeLoopCompression(t *testing.T, appV2 bool) {
 	s := teststorage.New(t)
 	t.Cleanup(func() { _ = s.Close() })
 
@@ -4963,7 +5476,8 @@ func TestScrapeLoopCompression(t *testing.T) {
 				MetricNameEscapingScheme:   model.AllowUTF8,
 			}
 
-			sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+			sa := selectAppendable(s, appV2)
+			sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 			require.NoError(t, err)
 			defer sp.stop()
 
@@ -5135,7 +5649,13 @@ func BenchmarkTargetScraperGzip(b *testing.B) {
 // When a scrape contains multiple instances for the same time series we should increment
 // prometheus_target_scrapes_sample_duplicate_timestamp_total metric.
 func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) {
-	sl, _ := newTestScrapeLoop(t)
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopSeriesAddedDuplicates(t, appV2)
+	})
+}
+
+func testScrapeLoopSeriesAddedDuplicates(t *testing.T, appV2 bool) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2))
 
 	app := sl.appender()
 	total, added, seriesAdded, err := app.append([]byte("test_metric 1\ntest_metric 2\ntest_metric 3\n"), "text/plain", time.Time{})
@@ -5170,32 +5690,37 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) {
 // This tests running a full scrape loop and checking that the scrape option
 // `native_histogram_min_bucket_factor` is used correctly.
 func TestNativeHistogramMaxSchemaSet(t *testing.T) {
-	testcases := map[string]struct {
-		minBucketFactor string
-		expectedSchema  int32
-	}{
-		"min factor not specified": {
-			minBucketFactor: "",
-			expectedSchema:  3, // Factor 1.09.
-		},
-		"min factor 1": {
-			minBucketFactor: "native_histogram_min_bucket_factor: 1",
-			expectedSchema:  3, // Factor 1.09.
-		},
-		"min factor 2": {
-			minBucketFactor: "native_histogram_min_bucket_factor: 2",
-			expectedSchema:  0, // Factor 2.00.
-		},
-	}
-	for name, tc := range testcases {
-		t.Run(name, func(t *testing.T) {
-			t.Parallel()
-			testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema)
-		})
-	}
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		for _, tc := range []struct {
+			name            string
+			minBucketFactor string
+			expectedSchema  int32
+		}{
+			{
+				name:            "min factor not specified",
+				minBucketFactor: "",
+				expectedSchema:  3, // Factor 1.09.
+			},
+			{
+				name:            "min factor 1",
+				minBucketFactor: "native_histogram_min_bucket_factor: 1",
+				expectedSchema:  3, // Factor 1.09.
+			},
+			{
+				name:            "min factor 2",
+				minBucketFactor: "native_histogram_min_bucket_factor: 2",
+				expectedSchema:  0, // Factor 2.00.
+			},
+		} {
+			t.Run(tc.name, func(t *testing.T) {
+				t.Parallel()
+				testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema, appV2)
+			})
+		}
+	})
 }
 
-func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expectedSchema int32) {
+func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expectedSchema int32, appV2 bool) {
 	// Create a ProtoBuf message to serve as a Prometheus metric.
 	nativeHistogram := prometheus.NewHistogram(
 		prometheus.HistogramOpts{
@@ -5248,6 +5773,12 @@ scrape_configs:
 
 	mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond)}, nil, nil, s, reg)
 	require.NoError(t, err)
+
+	if appV2 {
+		mng.appendableV2 = s
+		mng.appendable = nil
+	}
+
 	cfg, err := config.Load(configStr, promslog.NewNopLogger())
 	require.NoError(t, err)
 	require.NoError(t, mng.ApplyConfig(cfg))
@@ -5303,6 +5834,12 @@ scrape_configs:
 }
 
 func TestTargetScrapeConfigWithLabels(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testTargetScrapeConfigWithLabels(t, appV2)
+	})
+}
+
+func testTargetScrapeConfigWithLabels(t *testing.T, appV2 bool) {
 	t.Parallel()
 	const (
 		configTimeout        = 1500 * time.Millisecond
@@ -5348,7 +5885,8 @@ func TestTargetScrapeConfigWithLabels(t *testing.T) {
 			}
 		}
 
-		sp, err := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+		sa := selectAppendable(teststorage.NewAppendable(), appV2)
+		sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 		require.NoError(t, err)
 		t.Cleanup(sp.stop)
 
@@ -5486,6 +6024,12 @@ func newScrapableServer(scrapeText string) (s *httptest.Server, scrapedTwice cha
 
 // Regression test for the panic fixed in https://github.com/prometheus/prometheus/pull/15523.
 func TestScrapePoolScrapeAfterReload(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapePoolScrapeAfterReload(t, appV2)
+	})
+}
+
+func testScrapePoolScrapeAfterReload(t *testing.T, appV2 bool) {
 	h := httptest.NewServer(http.HandlerFunc(
 		func(w http.ResponseWriter, _ *http.Request) {
 			_, _ = w.Write([]byte{0x42, 0x42})
@@ -5511,7 +6055,8 @@ func TestScrapePoolScrapeAfterReload(t *testing.T) {
 		},
 	}
 
-	p, err := newScrapePool(cfg, teststorage.NewAppendable(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
+	sa := selectAppendable(teststorage.NewAppendable(), appV2)
+	p, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{}, newTestScrapeMetrics(t))
 	require.NoError(t, err)
 	t.Cleanup(p.stop)
 
@@ -5531,6 +6076,12 @@ func TestScrapePoolScrapeAfterReload(t *testing.T) {
 // The first scrape fails with a parsing error, but the second should
 // succeed and cause `metric_1=11` to appear in the appender.
 func TestScrapeAppendWithParseError(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeAppendWithParseError(t, appV2)
+	})
+}
+
+func testScrapeAppendWithParseError(t *testing.T, appV2 bool) {
 	const (
 		scrape1 = `metric_a 1
 `
@@ -5539,7 +6090,7 @@ func TestScrapeAppendWithParseError(t *testing.T) {
 	)
 
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 	now := time.Now()
 
 	app := sl.appender()
@@ -5565,17 +6116,22 @@ func TestScrapeAppendWithParseError(t *testing.T) {
 			V: 11,
 		},
 	}
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 // This test covers a case where there's a target with sample_limit set and some samples
 // changes between scrapes.
 func TestScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendSampleLimitWithDisappearingSeries(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T, appV2 bool) {
 	const sampleLimit = 4
 
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.sampleLimit = sampleLimit
 	})
 
@@ -5609,7 +6165,7 @@ func TestScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T) {
 			V: 1,
 		},
 	}
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
 
 	now = now.Add(time.Minute)
 	app = sl.appender()
@@ -5624,7 +6180,7 @@ func TestScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T) {
 	require.Equal(t, 6, samplesScraped)
 	require.Equal(t, 6, samplesAfterRelabel)
 	require.Equal(t, 1, createdSeries) // We've added one series before hitting the limit.
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
+	testutil.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
 	sl.cache.iterDone(false)
 
 	now = now.Add(time.Minute)
@@ -5668,17 +6224,22 @@ func TestScrapeLoopAppendSampleLimitWithDisappearingSeries(t *testing.T) {
 			V: math.Float64frombits(value.StaleNaN),
 		},
 	}...)
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", appTest)
 }
 
 // This test covers a case where there's a target with sample_limit set and each scrape sees a completely
 // different set of samples.
 func TestScrapeLoopAppendSampleLimitReplaceAllSamples(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopAppendSampleLimitReplaceAllSamples(t, appV2)
+	})
+}
+
+func testScrapeLoopAppendSampleLimitReplaceAllSamples(t *testing.T, appV2 bool) {
 	const sampleLimit = 4
 
 	appTest := teststorage.NewAppendable()
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
-		sl.appendable = appTest
+	sl, _ := newTestScrapeLoop(t, withAppendable(appTest, appV2), func(sl *scrapeLoop) {
 		sl.sampleLimit = sampleLimit
 	})
 
@@ -5717,7 +6278,7 @@ func TestScrapeLoopAppendSampleLimitReplaceAllSamples(t *testing.T) {
 			V: 1,
 		},
 	}
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
 
 	now = now.Add(time.Minute)
 	app = sl.appender()
@@ -5778,14 +6339,20 @@ func TestScrapeLoopAppendSampleLimitReplaceAllSamples(t *testing.T) {
 			V: math.Float64frombits(value.StaleNaN),
 		},
 	}...)
-	requireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
+	teststorage.RequireEqual(t, want, appTest.ResultSamples(), "Appended samples not as expected:\n%s", app)
 }
 
 func TestScrapeLoopDisableStalenessMarkerInjection(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testScrapeLoopDisableStalenessMarkerInjection(t, appV2)
+	})
+}
+
+func testScrapeLoopDisableStalenessMarkerInjection(t *testing.T, appV2 bool) {
 	loopDone := atomic.NewBool(false)
 
 	appTest := teststorage.NewAppendable()
-	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest))
+	sl, scraper := newTestScrapeLoop(t, withAppendable(appTest, appV2))
 	scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error {
 		if _, err := w.Write([]byte("metric_a 42\n")); err != nil {
 			return err
@@ -5834,6 +6401,7 @@ func BenchmarkScrapePoolRestartLoops(b *testing.B) {
 			ScrapeTimeout:              model.Duration(1 * time.Hour),
 		},
 		nil,
+		nil,
 		0,
 		nil,
 		nil,
@@ -5859,6 +6427,12 @@ func BenchmarkScrapePoolRestartLoops(b *testing.B) {
 // TestNewScrapeLoopHonorLabelsWiring verifies that newScrapeLoop correctly wires
 // HonorLabels (not HonorTimestamps) to the sampleMutator.
 func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testNewScrapeLoopHonorLabelsWiring(t, appV2)
+	})
+}
+
+func testNewScrapeLoopHonorLabelsWiring(t *testing.T, appV2 bool) {
 	// Scraped metric has label "lbl" with value "scraped".
 	// Discovery target has label "lbl" with value "discovery".
 	// With honor_labels=true, the scraped value should win.
@@ -5902,7 +6476,8 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) {
 				MetricNameValidationScheme: model.UTF8Validation,
 			}
 
-			sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{skipOffsetting: true}, newTestScrapeMetrics(t))
+			sa := selectAppendable(s, appV2)
+			sp, err := newScrapePool(cfg, sa.V1(), sa.V2(), 0, nil, nil, &Options{skipOffsetting: true}, newTestScrapeMetrics(t))
 			require.NoError(t, err)
 			defer sp.stop()
 
@@ -5936,6 +6511,12 @@ func TestNewScrapeLoopHonorLabelsWiring(t *testing.T) {
 }
 
 func TestDropsSeriesFromMetricRelabeling(t *testing.T) {
+	foreachAppendable(t, func(t *testing.T, appV2 bool) {
+		testDropsSeriesFromMetricRelabeling(t, appV2)
+	})
+}
+
+func testDropsSeriesFromMetricRelabeling(t *testing.T, appV2 bool) {
 	target := &Target{}
 	relabelConfig := []*relabel.Config{
 		{
@@ -5951,7 +6532,7 @@ func TestDropsSeriesFromMetricRelabeling(t *testing.T) {
 			NameValidationScheme: model.UTF8Validation,
 		},
 	}
-	sl, _ := newTestScrapeLoop(t, func(sl *scrapeLoop) {
+	sl, _ := newTestScrapeLoop(t, withAppendable(teststorage.NewAppendable(), appV2), func(sl *scrapeLoop) {
 		sl.sampleMutator = func(l labels.Labels) labels.Labels {
 			return mutateSampleLabels(l, target, true, relabelConfig)
 		}
diff --git a/scrape/target.go b/scrape/target.go
index 4265f9e782..1040241bd3 100644
--- a/scrape/target.go
+++ b/scrape/target.go
@@ -454,6 +454,105 @@ func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels
 	return ref, nil
 }
 
+// limitAppender limits the number of total appended samples in a batch.
+type limitAppenderV2 struct {
+	storage.AppenderV2
+
+	limit int
+	i     int
+}
+
+func (app *limitAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (storage.SeriesRef, error) {
+	// Bypass sample_limit checks only if we have a staleness marker for a known series (ref value is non-zero).
+	// This ensures that if a series is already in TSDB then we always write the marker.
+	if ref == 0 || !value.IsStaleNaN(v) {
+		app.i++
+		if app.i > app.limit {
+			return 0, errSampleLimit
+		}
+	}
+	return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts)
+}
+
+type timeLimitAppenderV2 struct {
+	storage.AppenderV2
+
+	maxTime int64
+}
+
+func (app *timeLimitAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (storage.SeriesRef, error) {
+	if t > app.maxTime {
+		return 0, storage.ErrOutOfBounds
+	}
+
+	return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts)
+}
+
+// bucketLimitAppender limits the number of total appended samples in a batch.
+type bucketLimitAppenderV2 struct {
+	storage.AppenderV2
+
+	limit int
+}
+
+func (app *bucketLimitAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (_ storage.SeriesRef, err error) {
+	if h != nil {
+		// Return with an early error if the histogram has too many buckets and the
+		// schema is not exponential, in which case we can't reduce the resolution.
+		if len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit && !histogram.IsExponentialSchema(h.Schema) {
+			return 0, errBucketLimit
+		}
+		for len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit {
+			if h.Schema <= histogram.ExponentialSchemaMin {
+				return 0, errBucketLimit
+			}
+			if err = h.ReduceResolution(h.Schema - 1); err != nil {
+				return 0, err
+			}
+		}
+	}
+	if fh != nil {
+		// Return with an early error if the histogram has too many buckets and the
+		// schema is not exponential, in which case we can't reduce the resolution.
+		if len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit && !histogram.IsExponentialSchema(fh.Schema) {
+			return 0, errBucketLimit
+		}
+		for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit {
+			if fh.Schema <= histogram.ExponentialSchemaMin {
+				return 0, errBucketLimit
+			}
+			if err = fh.ReduceResolution(fh.Schema - 1); err != nil {
+				return 0, err
+			}
+		}
+	}
+	return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts)
+}
+
+type maxSchemaAppenderV2 struct {
+	storage.AppenderV2
+
+	maxSchema int32
+}
+
+func (app *maxSchemaAppenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, opts storage.AOptions) (_ storage.SeriesRef, err error) {
+	if h != nil {
+		if histogram.IsExponentialSchemaReserved(h.Schema) && h.Schema > app.maxSchema {
+			if err = h.ReduceResolution(app.maxSchema); err != nil {
+				return 0, err
+			}
+		}
+	}
+	if fh != nil {
+		if histogram.IsExponentialSchemaReserved(fh.Schema) && fh.Schema > app.maxSchema {
+			if err = fh.ReduceResolution(app.maxSchema); err != nil {
+				return 0, err
+			}
+		}
+	}
+	return app.AppenderV2.Append(ref, ls, st, t, v, h, fh, opts)
+}
+
 // PopulateDiscoveredLabels sets base labels on lb from target and group labels and scrape configuration, before relabeling.
 func PopulateDiscoveredLabels(lb *labels.Builder, cfg *config.ScrapeConfig, tLabels, tgLabels model.LabelSet) {
 	lb.Reset(labels.EmptyLabels())
diff --git a/scrape/target_test.go b/scrape/target_test.go
index 06227da816..ea0aa2009f 100644
--- a/scrape/target_test.go
+++ b/scrape/target_test.go
@@ -35,6 +35,7 @@ import (
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/timestamp"
+	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/util/teststorage"
 )
 
@@ -610,37 +611,65 @@ func TestBucketLimitAppender(t *testing.T) {
 		},
 	}
 
-	appTest := teststorage.NewAppendable()
-
 	for _, c := range cases {
 		for _, floatHisto := range []bool{true, false} {
 			t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) {
-				app := &bucketLimitAppender{Appender: appTest.Appender(t.Context()), limit: c.limit}
-				ts := int64(10 * time.Minute / time.Millisecond)
-				lbls := labels.FromStrings("__name__", "sparse_histogram_series")
-				var err error
-				if floatHisto {
-					fh := c.h.Copy().ToFloat(nil)
-					_, err = app.AppendHistogram(0, lbls, ts, nil, fh)
-					if c.expectError {
-						require.Error(t, err)
+				t.Run("appV2=false", func(t *testing.T) {
+					app := &bucketLimitAppender{Appender: teststorage.NewAppendable().Appender(t.Context()), limit: c.limit}
+					ts := int64(10 * time.Minute / time.Millisecond)
+					lbls := labels.FromStrings("__name__", "sparse_histogram_series")
+					var err error
+					if floatHisto {
+						fh := c.h.Copy().ToFloat(nil)
+						_, err = app.AppendHistogram(0, lbls, ts, nil, fh)
+						if c.expectError {
+							require.Error(t, err)
+						} else {
+							require.Equal(t, c.expectSchema, fh.Schema)
+							require.Equal(t, c.expectBucketCount, len(fh.NegativeBuckets)+len(fh.PositiveBuckets))
+							require.NoError(t, err)
+						}
 					} else {
-						require.Equal(t, c.expectSchema, fh.Schema)
-						require.Equal(t, c.expectBucketCount, len(fh.NegativeBuckets)+len(fh.PositiveBuckets))
-						require.NoError(t, err)
+						h := c.h.Copy()
+						_, err = app.AppendHistogram(0, lbls, ts, h, nil)
+						if c.expectError {
+							require.Error(t, err)
+						} else {
+							require.Equal(t, c.expectSchema, h.Schema)
+							require.Equal(t, c.expectBucketCount, len(h.NegativeBuckets)+len(h.PositiveBuckets))
+							require.NoError(t, err)
+						}
 					}
-				} else {
-					h := c.h.Copy()
-					_, err = app.AppendHistogram(0, lbls, ts, h, nil)
-					if c.expectError {
-						require.Error(t, err)
+					require.NoError(t, app.Commit())
+				})
+				t.Run("appV2=true", func(t *testing.T) {
+					app := &bucketLimitAppenderV2{AppenderV2: teststorage.NewAppendable().AppenderV2(t.Context()), limit: c.limit}
+					ts := int64(10 * time.Minute / time.Millisecond)
+					lbls := labels.FromStrings("__name__", "sparse_histogram_series")
+					var err error
+					if floatHisto {
+						fh := c.h.Copy().ToFloat(nil)
+						_, err = app.Append(0, lbls, 0, ts, 0, nil, fh, storage.AOptions{})
+						if c.expectError {
+							require.Error(t, err)
+						} else {
+							require.Equal(t, c.expectSchema, fh.Schema)
+							require.Equal(t, c.expectBucketCount, len(fh.NegativeBuckets)+len(fh.PositiveBuckets))
+							require.NoError(t, err)
+						}
 					} else {
-						require.Equal(t, c.expectSchema, h.Schema)
-						require.Equal(t, c.expectBucketCount, len(h.NegativeBuckets)+len(h.PositiveBuckets))
-						require.NoError(t, err)
+						h := c.h.Copy()
+						_, err = app.Append(0, lbls, 0, ts, 0, h, nil, storage.AOptions{})
+						if c.expectError {
+							require.Error(t, err)
+						} else {
+							require.Equal(t, c.expectSchema, h.Schema)
+							require.Equal(t, c.expectBucketCount, len(h.NegativeBuckets)+len(h.PositiveBuckets))
+							require.NoError(t, err)
+						}
 					}
-				}
-				require.NoError(t, app.Commit())
+					require.NoError(t, app.Commit())
+				})
 			})
 		}
 	}
@@ -696,27 +725,45 @@ func TestMaxSchemaAppender(t *testing.T) {
 		},
 	}
 
-	appTest := teststorage.NewAppendable()
-
 	for _, c := range cases {
 		for _, floatHisto := range []bool{true, false} {
 			t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) {
-				app := &maxSchemaAppender{Appender: appTest.Appender(t.Context()), maxSchema: c.maxSchema}
-				ts := int64(10 * time.Minute / time.Millisecond)
-				lbls := labels.FromStrings("__name__", "sparse_histogram_series")
-				var err error
-				if floatHisto {
-					fh := c.h.Copy().ToFloat(nil)
-					_, err = app.AppendHistogram(0, lbls, ts, nil, fh)
-					require.Equal(t, c.expectSchema, fh.Schema)
-					require.NoError(t, err)
-				} else {
-					h := c.h.Copy()
-					_, err = app.AppendHistogram(0, lbls, ts, h, nil)
-					require.Equal(t, c.expectSchema, h.Schema)
-					require.NoError(t, err)
-				}
-				require.NoError(t, app.Commit())
+				t.Run("appV2=false", func(t *testing.T) {
+					app := &maxSchemaAppender{Appender: teststorage.NewAppendable().Appender(t.Context()), maxSchema: c.maxSchema}
+					ts := int64(10 * time.Minute / time.Millisecond)
+					lbls := labels.FromStrings("__name__", "sparse_histogram_series")
+					var err error
+					if floatHisto {
+						fh := c.h.Copy().ToFloat(nil)
+						_, err = app.AppendHistogram(0, lbls, ts, nil, fh)
+						require.Equal(t, c.expectSchema, fh.Schema)
+						require.NoError(t, err)
+					} else {
+						h := c.h.Copy()
+						_, err = app.AppendHistogram(0, lbls, ts, h, nil)
+						require.Equal(t, c.expectSchema, h.Schema)
+						require.NoError(t, err)
+					}
+					require.NoError(t, app.Commit())
+				})
+				t.Run("appV2=true", func(t *testing.T) {
+					app := &maxSchemaAppenderV2{AppenderV2: teststorage.NewAppendable().AppenderV2(t.Context()), maxSchema: c.maxSchema}
+					ts := int64(10 * time.Minute / time.Millisecond)
+					lbls := labels.FromStrings("__name__", "sparse_histogram_series")
+					var err error
+					if floatHisto {
+						fh := c.h.Copy().ToFloat(nil)
+						_, err = app.Append(0, lbls, 0, ts, 0, nil, fh, storage.AOptions{})
+						require.Equal(t, c.expectSchema, fh.Schema)
+						require.NoError(t, err)
+					} else {
+						h := c.h.Copy()
+						_, err = app.Append(0, lbls, 0, ts, 0, h, nil, storage.AOptions{})
+						require.Equal(t, c.expectSchema, h.Schema)
+						require.NoError(t, err)
+					}
+					require.NoError(t, app.Commit())
+				})
 			})
 		}
 	}
@@ -724,32 +771,65 @@ func TestMaxSchemaAppender(t *testing.T) {
 
 // Test sample_limit when a scrape contains Native Histograms.
 func TestAppendWithSampleLimitAndNativeHistogram(t *testing.T) {
-	appTest := teststorage.NewAppendable()
-
 	now := time.Now()
-	app := appenderWithLimits(appTest.Appender(t.Context()), 2, 0, histogram.ExponentialSchemaMax)
+	t.Run("appV2=false", func(t *testing.T) {
+		app := appenderWithLimits(teststorage.NewAppendable().Appender(t.Context()), 2, 0, histogram.ExponentialSchemaMax)
 
-	// sample_limit is set to 2, so first two scrapes should work
-	_, err := app.Append(0, labels.FromStrings(model.MetricNameLabel, "foo"), timestamp.FromTime(now), 1)
-	require.NoError(t, err)
+		// sample_limit is set to 2, so first two scrapes should work
+		_, err := app.Append(0, labels.FromStrings(model.MetricNameLabel, "foo"), timestamp.FromTime(now), 1)
+		require.NoError(t, err)
 
-	// Second sample, should be ok.
-	_, err = app.AppendHistogram(
-		0,
-		labels.FromStrings(model.MetricNameLabel, "my_histogram1"),
-		timestamp.FromTime(now),
-		&histogram.Histogram{},
-		nil,
-	)
-	require.NoError(t, err)
+		// Second sample, should be ok.
+		_, err = app.AppendHistogram(
+			0,
+			labels.FromStrings(model.MetricNameLabel, "my_histogram1"),
+			timestamp.FromTime(now),
+			&histogram.Histogram{},
+			nil,
+		)
+		require.NoError(t, err)
 
-	// This is third sample with sample_limit=2, it should trigger errSampleLimit.
-	_, err = app.AppendHistogram(
-		0,
-		labels.FromStrings(model.MetricNameLabel, "my_histogram2"),
-		timestamp.FromTime(now),
-		&histogram.Histogram{},
-		nil,
-	)
-	require.ErrorIs(t, err, errSampleLimit)
+		// This is third sample with sample_limit=2, it should trigger errSampleLimit.
+		_, err = app.AppendHistogram(
+			0,
+			labels.FromStrings(model.MetricNameLabel, "my_histogram2"),
+			timestamp.FromTime(now),
+			&histogram.Histogram{},
+			nil,
+		)
+		require.ErrorIs(t, err, errSampleLimit)
+	})
+	t.Run("appV2=true", func(t *testing.T) {
+		app := appenderV2WithLimits(teststorage.NewAppendable().AppenderV2(t.Context()), 2, 0, histogram.ExponentialSchemaMax)
+
+		// sample_limit is set to 2, so first two scrapes should work
+		_, err := app.Append(0, labels.FromStrings(model.MetricNameLabel, "foo"), 0, timestamp.FromTime(now), 1, nil, nil, storage.AOptions{})
+		require.NoError(t, err)
+
+		// Second sample, should be ok.
+		_, err = app.Append(
+			0,
+			labels.FromStrings(model.MetricNameLabel, "my_histogram1"),
+			0,
+			timestamp.FromTime(now),
+			0,
+			&histogram.Histogram{},
+			nil,
+			storage.AOptions{},
+		)
+		require.NoError(t, err)
+
+		// This is third sample with sample_limit=2, it should trigger errSampleLimit.
+		_, err = app.Append(
+			0,
+			labels.FromStrings(model.MetricNameLabel, "my_histogram2"),
+			0,
+			timestamp.FromTime(now),
+			0,
+			&histogram.Histogram{},
+			nil,
+			storage.AOptions{},
+		)
+		require.ErrorIs(t, err, errSampleLimit)
+	})
 }
diff --git a/util/teststorage/appender.go b/util/teststorage/appender.go
index 55cb727ee0..d88d905694 100644
--- a/util/teststorage/appender.go
+++ b/util/teststorage/appender.go
@@ -21,15 +21,21 @@ import (
 	"slices"
 	"strings"
 	"sync"
+	"testing"
 
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
 	"go.uber.org/atomic"
 
 	"github.com/prometheus/prometheus/model/exemplar"
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/metadata"
+	"github.com/prometheus/prometheus/model/value"
 	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/util/testutil"
 )
 
 // Sample represents test, combined sample for mocking storage.AppenderV2.
@@ -91,6 +97,56 @@ func (s Sample) Equals(other Sample) bool {
 		slices.EqualFunc(s.ES, other.ES, exemplar.Exemplar.Equals)
 }
 
+var (
+	sampleComparer = cmp.Comparer(func(a, b Sample) bool {
+		return a.Equals(b)
+	})
+	byLabelSort = cmpopts.SortSlices(func(a, b Sample) int {
+		return labels.Compare(a.L, b.L)
+	})
+)
+
+func includeStaleNaNs(s []Sample) bool {
+	for _, e := range s {
+		if value.IsStaleNaN(e.V) {
+			return true
+		}
+	}
+	return false
+}
+
+// RequireEqual is a special require equal that correctly compare Prometheus structures.
+//
+// In comparison to testutil.RequireEqual, this function adds special logic for comparing []Samples.
+//
+// It also ignores ordering when expected slice contains at least one StaleNaN. This is because the
+// scrape StaleNan samples are generated by iterating over a map, thus expectedly different.
+//
+// TODO(bwplotka): We should likely reorder only within a group of sequential NaNs or only in scrape package.
+func RequireEqual(t testing.TB, expected, got []Sample, msgAndArgs ...any) {
+	opts := []cmp.Option{sampleComparer}
+	if includeStaleNaNs(expected) {
+		opts = append(opts, byLabelSort)
+	}
+	testutil.RequireEqualWithOptions(t, expected, got, opts, msgAndArgs...)
+}
+
+// RequireNotEqual is the negation of RequireEqual.
+func RequireNotEqual(t testing.TB, expected, got []Sample, msgAndArgs ...any) {
+	t.Helper()
+
+	opts := []cmp.Option{cmp.Comparer(labels.Equal), sampleComparer}
+	if includeStaleNaNs(expected) {
+		opts = append(opts, byLabelSort)
+	}
+	if !cmp.Equal(expected, got, opts...) {
+		return
+	}
+	require.Fail(t, fmt.Sprintf("Equal, but expected not: \n"+
+		"a: %s\n"+
+		"b: %s", expected, got), msgAndArgs...)
+}
+
 // Appendable is a storage.Appendable mock.
 // It allows recording all samples that were added through the appender and injecting errors.
 // Appendable will panic if more than one Appender is open.
@@ -108,8 +164,7 @@ type Appendable struct {
 	rolledbackSamples []Sample
 
 	// Optional chain (Appender will collect samples, then run next).
-	next   storage.Appendable
-	nextV2 storage.AppendableV2
+	next compatAppendable
 }
 
 // NewAppendable returns mock Appendable.
@@ -117,15 +172,14 @@ func NewAppendable() *Appendable {
 	return &Appendable{}
 }
 
-// Then chains another appender from the provided Appendable for the Appender calls.
-func (a *Appendable) Then(appendable storage.Appendable) *Appendable {
-	a.next = appendable
-	return a
+type compatAppendable interface {
+	storage.Appendable
+	storage.AppendableV2
 }
 
-// ThenV2 chains another appenderV2 from the provided AppendableV2 for the AppenderV2 calls.
-func (a *Appendable) ThenV2(appendable storage.AppendableV2) *Appendable {
-	a.nextV2 = appendable
+// Then chains another appender from the provided Appendable for the Appender calls.
+func (a *Appendable) Then(appendable compatAppendable) *Appendable {
+	a.next = appendable
 	return a
 }
 
@@ -289,13 +343,12 @@ func (a *Appendable) Appender(ctx context.Context) storage.Appender {
 	ret := &appender{baseAppender: baseAppender{a: a}}
 	if a.openAppenders.Inc() > 1 {
 		ret.err = errors.New("teststorage.Appendable.Appender() concurrent use is not supported; attempted opening new Appender() without Commit/Rollback of the previous one. Extend the implementation if concurrent mock is needed")
+		return ret
 	}
 
 	if a.next != nil {
 		app := a.next.Appender(ctx)
 		ret.next, ret.nextTr = app, app
-	} else if a.nextV2 != nil {
-		ret.err = errors.Join(ret.err, errors.New("teststorage.Appendable.Appender() invoked with .ThenV2 but no .Then was supplied; likely bug"))
 	}
 	return ret
 }
@@ -332,7 +385,8 @@ func computeOrCheckRef(ref storage.SeriesRef, ls labels.Labels) (storage.SeriesR
 	}
 
 	if storage.SeriesRef(h) != ref {
-		// Check for buggy ref while we at it.
+		// Check for buggy ref while we are at it. This only makes sense for cases without .Then*, because further appendable
+		// might have a different ref computation logic e.g. TSDB uses atomic increments.
 		return 0, errors.New("teststorage.appender: found input ref not matching labels; potential bug in Appendable usage")
 	}
 	return ref, nil
@@ -442,13 +496,12 @@ func (a *Appendable) AppenderV2(ctx context.Context) storage.AppenderV2 {
 	ret := &appenderV2{baseAppender: baseAppender{a: a}}
 	if a.openAppenders.Inc() > 1 {
 		ret.err = errors.New("teststorage.Appendable.AppenderV2() concurrent use is not supported; attempted opening new AppenderV2() without Commit/Rollback of the previous one. Extend the implementation if concurrent mock is needed")
+		return ret
 	}
 
-	if a.nextV2 != nil {
-		app := a.nextV2.AppenderV2(ctx)
+	if a.next != nil {
+		app := a.next.AppenderV2(ctx)
 		ret.next, ret.nextTr = app, app
-	} else if a.next != nil {
-		ret.err = errors.Join(ret.err, errors.New("teststorage.Appendable.AppenderV2() invoked with .Then but no .ThenV2 was supplied; likely bug"))
 	}
 	return ret
 }
@@ -498,13 +551,14 @@ func (a *appenderV2) Append(ref storage.SeriesRef, ls labels.Labels, st, t int64
 
 	if a.next != nil {
 		ref, err = a.next.Append(ref, ls, st, t, v, h, fh, opts)
+		if err != nil {
+			return 0, err
+		}
+	} else {
+		ref, err = computeOrCheckRef(ref, ls)
 		if err != nil {
 			return ref, err
 		}
 	}
-	ref, err = computeOrCheckRef(ref, ls)
-	if err != nil {
-		return ref, err
-	}
 	return ref, partialErr
 }
diff --git a/util/teststorage/appender_test.go b/util/teststorage/appender_test.go
index 5b0e03483b..bbd6b54125 100644
--- a/util/teststorage/appender_test.go
+++ b/util/teststorage/appender_test.go
@@ -15,16 +15,16 @@ package teststorage
 
 import (
 	"errors"
-	"fmt"
+	"math"
 	"testing"
 
-	"github.com/google/go-cmp/cmp"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
 	"github.com/prometheus/prometheus/model/exemplar"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/metadata"
+	"github.com/prometheus/prometheus/model/value"
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/prometheus/util/testutil"
@@ -140,24 +140,11 @@ func TestAppendable_Then(t *testing.T) {
 
 	// Ensure next mock record all the appends when appending to app.
 	testAppendableV1(t, nextAppTest, app)
-
-	// V2 should fail as Then was supplied with Appendable V1.
-	require.Error(t, app.AppenderV2(t.Context()).Commit())
-}
-
-func TestAppendable_ThenV2(t *testing.T) {
-	nextAppTest := NewAppendable()
-	app := NewAppendable().ThenV2(nextAppTest)
-
 	// Ensure next mock record all the appends when appending to app.
 	testAppendableV2(t, nextAppTest, app)
-
-	// V1 should fail as ThenV2 was supplied with Appendable V2.
-	require.Error(t, app.Appender(t.Context()).Commit())
 }
 
-// TestSample_RequireEqual ensures standard testutil.RequireEqual is enough for comparisons.
-// This is thanks to the fact metadata has now Equals method.
+// TestSample_RequireEqual.
 func TestSample_RequireEqual(t *testing.T) {
 	a := []Sample{
 		{},
@@ -165,7 +152,7 @@ func TestSample_RequireEqual(t *testing.T) {
 		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: 123.123},
 		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
 	}
-	testutil.RequireEqual(t, a, a)
+	RequireEqual(t, a, a)
 
 	b1 := []Sample{
 		{},
@@ -173,7 +160,7 @@ func TestSample_RequireEqual(t *testing.T) {
 		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2_diff", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: 123.123}, // test_metric2_diff is different.
 		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
 	}
-	requireNotEqual(t, a, b1)
+	RequireNotEqual(t, a, b1)
 
 	b2 := []Sample{
 		{},
@@ -181,7 +168,7 @@ func TestSample_RequireEqual(t *testing.T) {
 		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: 123.123},
 		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo2")}}}, // exemplar is different.
 	}
-	requireNotEqual(t, a, b2)
+	RequireNotEqual(t, a, b2)
 
 	b3 := []Sample{
 		{},
@@ -189,7 +176,7 @@ func TestSample_RequireEqual(t *testing.T) {
 		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: 123.123, T: 123}, // Timestamp is different.
 		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
 	}
-	requireNotEqual(t, a, b3)
+	RequireNotEqual(t, a, b3)
 
 	b4 := []Sample{
 		{},
@@ -197,7 +184,7 @@ func TestSample_RequireEqual(t *testing.T) {
 		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: 456.456}, // Value is different.
 		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
 	}
-	requireNotEqual(t, a, b4)
+	RequireNotEqual(t, a, b4)
 
 	b5 := []Sample{
 		{},
@@ -205,19 +192,43 @@ func TestSample_RequireEqual(t *testing.T) {
 		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: 123.123},
 		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
 	}
-	requireNotEqual(t, a, b5)
-}
+	RequireNotEqual(t, a, b5)
 
-// TODO(bwplotka): While this mimick testutil.RequireEqual just making it negative, this does not literally test
-// testutil.RequireEqual. Either build test suita that mocks `testing.TB` or get rid of testutil.RequireEqual somehow.
-func requireNotEqual(t testing.TB, a, b any) {
-	t.Helper()
-	if !cmp.Equal(a, b, cmp.Comparer(labels.Equal)) {
-		return
+	// NaN comparison.
+	a = []Sample{
+		{},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)},
+		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
 	}
-	require.Fail(t, fmt.Sprintf("Equal, but expected not: \n"+
-		"a: %s\n"+
-		"b: %s", a, b))
+	RequireEqual(t, a, a)
+
+	// NaN comparison with different order.
+	a = []Sample{
+		{},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric10", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)},
+		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
+	}
+	b6 := []Sample{
+		{},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric10", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)},
+		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
+	}
+	RequireEqual(t, a, b6)
+
+	// Not equal with NaNs.
+	b7 := []Sample{
+		{},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric_total"), M: metadata.Metadata{Type: "counter", Unit: "metric", Help: "some help text"}},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric10", "foo", "bar"), M: metadata.Metadata{Type: "gauge", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)},
+		{L: labels.FromStrings(model.MetricNameLabel, "test_metric2", "foo", "bar"), M: metadata.Metadata{Type: "gauge2", Unit: "", Help: "other help text"}, V: math.Float64frombits(value.StaleNaN)}, // metadata different
+		{ES: []exemplar.Exemplar{{Labels: labels.FromStrings(model.MetricNameLabel, "yolo")}}},
+	}
+	RequireNotEqual(t, a, b7)
 }
 
 func TestConcurrentAppender_ReturnsErrAppender(t *testing.T) {
diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go
index 17efdda77d..a8e1306955 100644
--- a/util/teststorage/storage.go
+++ b/util/teststorage/storage.go
@@ -28,34 +28,34 @@ import (
 	"github.com/prometheus/prometheus/util/testutil"
 )
 
+type Option func(opt *tsdb.Options)
+
 // New returns a new TestStorage for testing purposes
 // that removes all associated files on closing.
-func New(t testutil.T, outOfOrderTimeWindow ...int64) *TestStorage {
-	stor, err := NewWithError(outOfOrderTimeWindow...)
+func New(t testutil.T, o ...Option) *TestStorage {
+	s, err := NewWithError(o...)
 	require.NoError(t, err)
-	return stor
+	return s
 }
 
 // NewWithError returns a new TestStorage for user facing tests, which reports
 // errors directly.
-func NewWithError(outOfOrderTimeWindow ...int64) (*TestStorage, error) {
-	dir, err := os.MkdirTemp("", "test_storage")
-	if err != nil {
-		return nil, fmt.Errorf("opening test directory: %w", err)
-	}
-
+func NewWithError(o ...Option) (*TestStorage, error) {
 	// Tests just load data for a series sequentially. Thus we
 	// need a long appendable window.
 	opts := tsdb.DefaultOptions()
 	opts.MinBlockDuration = int64(24 * time.Hour / time.Millisecond)
 	opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond)
 	opts.RetentionDuration = 0
+	opts.OutOfOrderTimeWindow = 0
 
-	// Set OutOfOrderTimeWindow if provided, otherwise use default (0)
-	if len(outOfOrderTimeWindow) > 0 {
-		opts.OutOfOrderTimeWindow = outOfOrderTimeWindow[0]
-	} else {
-		opts.OutOfOrderTimeWindow = 0 // Default value is zero
+	for _, opt := range o {
+		opt(opts)
+	}
+
+	dir, err := os.MkdirTemp("", "test_storage")
+	if err != nil {
+		return nil, fmt.Errorf("opening test directory: %w", err)
 	}
 
 	db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats())