mirror of
https://github.com/prometheus/prometheus.git
synced 2025-08-05 21:57:09 +02:00
Rather than keeping the entire symbol table in memory, keep every nth offset and walk from there to the entry we need. This ends up slightly slower, ~360ms per 1M series returned from PostingsForMatchers which is not much considering the rest of the CPU such a query would go on to use. Make LabelValues use the postings tables, rather than having to do symbol lookups. Use yoloString, as PostingsForMatchers doesn't need the strings to stick around and adjust the API call to keep the Querier open until it's all marshalled. Remove allocatedSymbols memory optimisation, we no longer keep all the symbol strings in heap memory. Remove LabelValuesFor and LabelIndices, they're dead code. Ensure we've still tests for label indices, and add missing test that we can work with old V1 Format index files. PostingForMatchers performance is slightly better, with a big drop in allocation counts due to using yoloString for LabelValues: benchmark old ns/op new ns/op delta BenchmarkPostingsForMatchers/Block/n="1"-4 36698 36681 -0.05% BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 522786 560887 +7.29% BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 511652 537680 +5.09% BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 522102 564239 +8.07% BenchmarkPostingsForMatchers/Block/i=~".*"-4 113689911 111795919 -1.67% BenchmarkPostingsForMatchers/Block/i=~".+"-4 135825572 132871085 -2.18% BenchmarkPostingsForMatchers/Block/i=~""-4 40782628 38038181 -6.73% BenchmarkPostingsForMatchers/Block/i!=""-4 31267869 29194327 -6.63% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 112733329 111568823 -1.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 112868153 111232029 -1.45% BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 31338257 29349446 -6.35% BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 32054482 29972436 -6.50% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 136504654 133968442 -1.86% BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 27960350 27264997 -2.49% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 136765564 133860724 -2.12% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 163714583 159453668 -2.60% benchmark old allocs new allocs delta BenchmarkPostingsForMatchers/Block/n="1"-4 6 6 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 11 11 +0.00% BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 11 11 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 17 15 -11.76% BenchmarkPostingsForMatchers/Block/i=~".*"-4 100012 12 -99.99% BenchmarkPostingsForMatchers/Block/i=~".+"-4 200040 100040 -49.99% BenchmarkPostingsForMatchers/Block/i=~""-4 200045 100045 -49.99% BenchmarkPostingsForMatchers/Block/i!=""-4 200041 100041 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 100017 17 -99.98% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 100023 23 -99.98% BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 200046 100046 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 200050 100050 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 200049 100049 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 111150 11150 -89.97% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 200055 100055 -49.99% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 311238 111238 -64.26% benchmark old bytes new bytes delta BenchmarkPostingsForMatchers/Block/n="1"-4 296 296 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j="foo"-4 424 424 +0.00% BenchmarkPostingsForMatchers/Block/j="foo",n="1"-4 424 424 +0.00% BenchmarkPostingsForMatchers/Block/n="1",j!="foo"-4 552 1544 +179.71% BenchmarkPostingsForMatchers/Block/i=~".*"-4 1600482 1606125 +0.35% BenchmarkPostingsForMatchers/Block/i=~".+"-4 17259065 17264709 +0.03% BenchmarkPostingsForMatchers/Block/i=~""-4 17259150 17264780 +0.03% BenchmarkPostingsForMatchers/Block/i!=""-4 17259048 17264680 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",j="foo"-4 1600610 1606242 +0.35% BenchmarkPostingsForMatchers/Block/n="1",i=~".*",i!="2",j="foo"-4 1600813 1606434 +0.35% BenchmarkPostingsForMatchers/Block/n="1",i!=""-4 17259176 17264808 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i!="",j="foo"-4 17259304 17264936 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",j="foo"-4 17259333 17264965 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~"1.+",j="foo"-4 3142628 3148262 +0.18% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!="2",j="foo"-4 17259509 17265141 +0.03% BenchmarkPostingsForMatchers/Block/n="1",i=~".+",i!~"2.*",j="foo"-4 20405680 20416944 +0.06% However overall Select performance is down and involves more allocs, due to having to do more than a simple map lookup to resolve a symbol and that all the strings returned are allocated: benchmark old ns/op new ns/op delta BenchmarkQuerierSelect/Block/1of1000000-4 506092636 862678244 +70.46% BenchmarkQuerierSelect/Block/10of1000000-4 505638968 860917636 +70.26% BenchmarkQuerierSelect/Block/100of1000000-4 505229450 882150048 +74.60% BenchmarkQuerierSelect/Block/1000of1000000-4 515905414 862241115 +67.13% BenchmarkQuerierSelect/Block/10000of1000000-4 516785354 874841110 +69.29% BenchmarkQuerierSelect/Block/100000of1000000-4 540742808 907030187 +67.74% BenchmarkQuerierSelect/Block/1000000of1000000-4 815224288 1181236903 +44.90% benchmark old allocs new allocs delta BenchmarkQuerierSelect/Block/1of1000000-4 4000020 6000020 +50.00% BenchmarkQuerierSelect/Block/10of1000000-4 4000038 6000038 +50.00% BenchmarkQuerierSelect/Block/100of1000000-4 4000218 6000218 +50.00% BenchmarkQuerierSelect/Block/1000of1000000-4 4002018 6002018 +49.97% BenchmarkQuerierSelect/Block/10000of1000000-4 4020018 6020018 +49.75% BenchmarkQuerierSelect/Block/100000of1000000-4 4200018 6200018 +47.62% BenchmarkQuerierSelect/Block/1000000of1000000-4 6000018 8000019 +33.33% benchmark old bytes new bytes delta BenchmarkQuerierSelect/Block/1of1000000-4 176001468 227201476 +29.09% BenchmarkQuerierSelect/Block/10of1000000-4 176002620 227202628 +29.09% BenchmarkQuerierSelect/Block/100of1000000-4 176014140 227214148 +29.09% BenchmarkQuerierSelect/Block/1000of1000000-4 176129340 227329348 +29.07% BenchmarkQuerierSelect/Block/10000of1000000-4 177281340 228481348 +28.88% BenchmarkQuerierSelect/Block/100000of1000000-4 188801340 240001348 +27.12% BenchmarkQuerierSelect/Block/1000000of1000000-4 304001340 355201616 +16.84% Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
646 lines
18 KiB
Go
646 lines
18 KiB
Go
// Copyright 2017 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package tsdb
|
|
|
|
import (
|
|
"encoding/json"
|
|
"io"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"github.com/go-kit/kit/log"
|
|
"github.com/go-kit/kit/log/level"
|
|
"github.com/oklog/ulid"
|
|
"github.com/pkg/errors"
|
|
"github.com/prometheus/prometheus/pkg/labels"
|
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
|
"github.com/prometheus/prometheus/tsdb/chunks"
|
|
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
|
|
"github.com/prometheus/prometheus/tsdb/fileutil"
|
|
"github.com/prometheus/prometheus/tsdb/index"
|
|
"github.com/prometheus/prometheus/tsdb/tombstones"
|
|
)
|
|
|
|
// IndexWriter serializes the index for a block of series data.
|
|
// The methods must be called in the order they are specified in.
|
|
type IndexWriter interface {
|
|
// AddSymbols registers all string symbols that are encountered in series
|
|
// and other indices.
|
|
AddSymbols(sym map[string]struct{}) error
|
|
|
|
// AddSeries populates the index writer with a series and its offsets
|
|
// of chunks that the index can reference.
|
|
// Implementations may require series to be insert in increasing order by
|
|
// their labels.
|
|
// The reference numbers are used to resolve entries in postings lists that
|
|
// are added later.
|
|
AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta) error
|
|
|
|
// WriteLabelIndex serializes an index from label names to values.
|
|
// The passed in values chained tuples of strings of the length of names.
|
|
WriteLabelIndex(names []string, values []string) error
|
|
|
|
// Close writes any finalization and closes the resources associated with
|
|
// the underlying writer.
|
|
Close() error
|
|
}
|
|
|
|
// IndexReader provides reading access of serialized index data.
|
|
type IndexReader interface {
|
|
// Symbols returns a set of string symbols that may occur in series' labels
|
|
// and indices.
|
|
Symbols() (map[string]struct{}, error)
|
|
|
|
// LabelValues returns sorted possible label values.
|
|
LabelValues(names ...string) (index.StringTuples, error)
|
|
|
|
// Postings returns the postings list iterator for the label pairs.
|
|
// The Postings here contain the offsets to the series inside the index.
|
|
// Found IDs are not strictly required to point to a valid Series, e.g.
|
|
// during background garbage collections. Input values must be sorted.
|
|
Postings(name string, values ...string) (index.Postings, error)
|
|
|
|
// SortedPostings returns a postings list that is reordered to be sorted
|
|
// by the label set of the underlying series.
|
|
SortedPostings(index.Postings) index.Postings
|
|
|
|
// Series populates the given labels and chunk metas for the series identified
|
|
// by the reference.
|
|
// Returns ErrNotFound if the ref does not resolve to a known series.
|
|
Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error
|
|
|
|
// LabelNames returns all the unique label names present in the index in sorted order.
|
|
LabelNames() ([]string, error)
|
|
|
|
// Close releases the underlying resources of the reader.
|
|
Close() error
|
|
}
|
|
|
|
// StringTuples provides access to a sorted list of string tuples.
|
|
type StringTuples interface {
|
|
// Total number of tuples in the list.
|
|
Len() int
|
|
// At returns the tuple at position i.
|
|
At(i int) ([]string, error)
|
|
}
|
|
|
|
// ChunkWriter serializes a time block of chunked series data.
|
|
type ChunkWriter interface {
|
|
// WriteChunks writes several chunks. The Chunk field of the ChunkMetas
|
|
// must be populated.
|
|
// After returning successfully, the Ref fields in the ChunkMetas
|
|
// are set and can be used to retrieve the chunks from the written data.
|
|
WriteChunks(chunks ...chunks.Meta) error
|
|
|
|
// Close writes any required finalization and closes the resources
|
|
// associated with the underlying writer.
|
|
Close() error
|
|
}
|
|
|
|
// ChunkReader provides reading access of serialized time series data.
|
|
type ChunkReader interface {
|
|
// Chunk returns the series data chunk with the given reference.
|
|
Chunk(ref uint64) (chunkenc.Chunk, error)
|
|
|
|
// Close releases all underlying resources of the reader.
|
|
Close() error
|
|
}
|
|
|
|
// BlockReader provides reading access to a data block.
|
|
type BlockReader interface {
|
|
// Index returns an IndexReader over the block's data.
|
|
Index() (IndexReader, error)
|
|
|
|
// Chunks returns a ChunkReader over the block's data.
|
|
Chunks() (ChunkReader, error)
|
|
|
|
// Tombstones returns a tombstones.Reader over the block's deleted data.
|
|
Tombstones() (tombstones.Reader, error)
|
|
|
|
// Meta provides meta information about the block reader.
|
|
Meta() BlockMeta
|
|
}
|
|
|
|
// Appendable defines an entity to which data can be appended.
|
|
type Appendable interface {
|
|
// Appender returns a new Appender against an underlying store.
|
|
Appender() Appender
|
|
}
|
|
|
|
// BlockMeta provides meta information about a block.
|
|
type BlockMeta struct {
|
|
// Unique identifier for the block and its contents. Changes on compaction.
|
|
ULID ulid.ULID `json:"ulid"`
|
|
|
|
// MinTime and MaxTime specify the time range all samples
|
|
// in the block are in.
|
|
MinTime int64 `json:"minTime"`
|
|
MaxTime int64 `json:"maxTime"`
|
|
|
|
// Stats about the contents of the block.
|
|
Stats BlockStats `json:"stats,omitempty"`
|
|
|
|
// Information on compactions the block was created from.
|
|
Compaction BlockMetaCompaction `json:"compaction"`
|
|
|
|
// Version of the index format.
|
|
Version int `json:"version"`
|
|
}
|
|
|
|
// BlockStats contains stats about contents of a block.
|
|
type BlockStats struct {
|
|
NumSamples uint64 `json:"numSamples,omitempty"`
|
|
NumSeries uint64 `json:"numSeries,omitempty"`
|
|
NumChunks uint64 `json:"numChunks,omitempty"`
|
|
NumTombstones uint64 `json:"numTombstones,omitempty"`
|
|
}
|
|
|
|
// BlockDesc describes a block by ULID and time range.
|
|
type BlockDesc struct {
|
|
ULID ulid.ULID `json:"ulid"`
|
|
MinTime int64 `json:"minTime"`
|
|
MaxTime int64 `json:"maxTime"`
|
|
}
|
|
|
|
// BlockMetaCompaction holds information about compactions a block went through.
|
|
type BlockMetaCompaction struct {
|
|
// Maximum number of compaction cycles any source block has
|
|
// gone through.
|
|
Level int `json:"level"`
|
|
// ULIDs of all source head blocks that went into the block.
|
|
Sources []ulid.ULID `json:"sources,omitempty"`
|
|
// Indicates that during compaction it resulted in a block without any samples
|
|
// so it should be deleted on the next reload.
|
|
Deletable bool `json:"deletable,omitempty"`
|
|
// Short descriptions of the direct blocks that were used to create
|
|
// this block.
|
|
Parents []BlockDesc `json:"parents,omitempty"`
|
|
Failed bool `json:"failed,omitempty"`
|
|
}
|
|
|
|
const indexFilename = "index"
|
|
const metaFilename = "meta.json"
|
|
const metaVersion1 = 1
|
|
|
|
func chunkDir(dir string) string { return filepath.Join(dir, "chunks") }
|
|
|
|
func readMetaFile(dir string) (*BlockMeta, int64, error) {
|
|
b, err := ioutil.ReadFile(filepath.Join(dir, metaFilename))
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
var m BlockMeta
|
|
|
|
if err := json.Unmarshal(b, &m); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
if m.Version != metaVersion1 {
|
|
return nil, 0, errors.Errorf("unexpected meta file version %d", m.Version)
|
|
}
|
|
|
|
return &m, int64(len(b)), nil
|
|
}
|
|
|
|
func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error) {
|
|
meta.Version = metaVersion1
|
|
|
|
// Make any changes to the file appear atomic.
|
|
path := filepath.Join(dir, metaFilename)
|
|
tmp := path + ".tmp"
|
|
defer func() {
|
|
if err := os.RemoveAll(tmp); err != nil {
|
|
level.Error(logger).Log("msg", "remove tmp file", "err", err.Error())
|
|
}
|
|
}()
|
|
|
|
f, err := os.Create(tmp)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
jsonMeta, err := json.MarshalIndent(meta, "", "\t")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
var merr tsdb_errors.MultiError
|
|
n, err := f.Write(jsonMeta)
|
|
if err != nil {
|
|
merr.Add(err)
|
|
merr.Add(f.Close())
|
|
return 0, merr.Err()
|
|
}
|
|
|
|
// Force the kernel to persist the file on disk to avoid data loss if the host crashes.
|
|
if err := f.Sync(); err != nil {
|
|
merr.Add(err)
|
|
merr.Add(f.Close())
|
|
return 0, merr.Err()
|
|
}
|
|
if err := f.Close(); err != nil {
|
|
return 0, err
|
|
}
|
|
return int64(n), fileutil.Replace(tmp, path)
|
|
}
|
|
|
|
// Block represents a directory of time series data covering a continuous time range.
|
|
type Block struct {
|
|
mtx sync.RWMutex
|
|
closing bool
|
|
pendingReaders sync.WaitGroup
|
|
|
|
dir string
|
|
meta BlockMeta
|
|
|
|
// Symbol Table Size in bytes.
|
|
// We maintain this variable to avoid recalculation every time.
|
|
symbolTableSize uint64
|
|
|
|
chunkr ChunkReader
|
|
indexr IndexReader
|
|
tombstones tombstones.Reader
|
|
|
|
logger log.Logger
|
|
|
|
numBytesChunks int64
|
|
numBytesIndex int64
|
|
numBytesTombstone int64
|
|
numBytesMeta int64
|
|
}
|
|
|
|
// OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used
|
|
// to instantiate chunk structs.
|
|
func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) {
|
|
if logger == nil {
|
|
logger = log.NewNopLogger()
|
|
}
|
|
var closers []io.Closer
|
|
defer func() {
|
|
if err != nil {
|
|
var merr tsdb_errors.MultiError
|
|
merr.Add(err)
|
|
merr.Add(closeAll(closers))
|
|
err = merr.Err()
|
|
}
|
|
}()
|
|
meta, sizeMeta, err := readMetaFile(dir)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cr, err := chunks.NewDirReader(chunkDir(dir), pool)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
closers = append(closers, cr)
|
|
|
|
ir, err := index.NewFileReader(filepath.Join(dir, indexFilename))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
closers = append(closers, ir)
|
|
|
|
tr, sizeTomb, err := tombstones.ReadTombstones(dir)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
closers = append(closers, tr)
|
|
|
|
pb = &Block{
|
|
dir: dir,
|
|
meta: *meta,
|
|
chunkr: cr,
|
|
indexr: ir,
|
|
tombstones: tr,
|
|
symbolTableSize: ir.SymbolTableSize(),
|
|
logger: logger,
|
|
numBytesChunks: cr.Size(),
|
|
numBytesIndex: ir.Size(),
|
|
numBytesTombstone: sizeTomb,
|
|
numBytesMeta: sizeMeta,
|
|
}
|
|
return pb, nil
|
|
}
|
|
|
|
// Close closes the on-disk block. It blocks as long as there are readers reading from the block.
|
|
func (pb *Block) Close() error {
|
|
pb.mtx.Lock()
|
|
pb.closing = true
|
|
pb.mtx.Unlock()
|
|
|
|
pb.pendingReaders.Wait()
|
|
|
|
var merr tsdb_errors.MultiError
|
|
|
|
merr.Add(pb.chunkr.Close())
|
|
merr.Add(pb.indexr.Close())
|
|
merr.Add(pb.tombstones.Close())
|
|
|
|
return merr.Err()
|
|
}
|
|
|
|
func (pb *Block) String() string {
|
|
return pb.meta.ULID.String()
|
|
}
|
|
|
|
// Dir returns the directory of the block.
|
|
func (pb *Block) Dir() string { return pb.dir }
|
|
|
|
// Meta returns meta information about the block.
|
|
func (pb *Block) Meta() BlockMeta { return pb.meta }
|
|
|
|
// MinTime returns the min time of the meta.
|
|
func (pb *Block) MinTime() int64 { return pb.meta.MinTime }
|
|
|
|
// MaxTime returns the max time of the meta.
|
|
func (pb *Block) MaxTime() int64 { return pb.meta.MaxTime }
|
|
|
|
// Size returns the number of bytes that the block takes up.
|
|
func (pb *Block) Size() int64 {
|
|
return pb.numBytesChunks + pb.numBytesIndex + pb.numBytesTombstone + pb.numBytesMeta
|
|
}
|
|
|
|
// ErrClosing is returned when a block is in the process of being closed.
|
|
var ErrClosing = errors.New("block is closing")
|
|
|
|
func (pb *Block) startRead() error {
|
|
pb.mtx.RLock()
|
|
defer pb.mtx.RUnlock()
|
|
|
|
if pb.closing {
|
|
return ErrClosing
|
|
}
|
|
pb.pendingReaders.Add(1)
|
|
return nil
|
|
}
|
|
|
|
// Index returns a new IndexReader against the block data.
|
|
func (pb *Block) Index() (IndexReader, error) {
|
|
if err := pb.startRead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return blockIndexReader{ir: pb.indexr, b: pb}, nil
|
|
}
|
|
|
|
// Chunks returns a new ChunkReader against the block data.
|
|
func (pb *Block) Chunks() (ChunkReader, error) {
|
|
if err := pb.startRead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return blockChunkReader{ChunkReader: pb.chunkr, b: pb}, nil
|
|
}
|
|
|
|
// Tombstones returns a new TombstoneReader against the block data.
|
|
func (pb *Block) Tombstones() (tombstones.Reader, error) {
|
|
if err := pb.startRead(); err != nil {
|
|
return nil, err
|
|
}
|
|
return blockTombstoneReader{Reader: pb.tombstones, b: pb}, nil
|
|
}
|
|
|
|
// GetSymbolTableSize returns the Symbol Table Size in the index of this block.
|
|
func (pb *Block) GetSymbolTableSize() uint64 {
|
|
return pb.symbolTableSize
|
|
}
|
|
|
|
func (pb *Block) setCompactionFailed() error {
|
|
pb.meta.Compaction.Failed = true
|
|
n, err := writeMetaFile(pb.logger, pb.dir, &pb.meta)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.numBytesMeta = n
|
|
return nil
|
|
}
|
|
|
|
type blockIndexReader struct {
|
|
ir IndexReader
|
|
b *Block
|
|
}
|
|
|
|
func (r blockIndexReader) Symbols() (map[string]struct{}, error) {
|
|
s, err := r.ir.Symbols()
|
|
return s, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
|
|
}
|
|
|
|
func (r blockIndexReader) LabelValues(names ...string) (index.StringTuples, error) {
|
|
st, err := r.ir.LabelValues(names...)
|
|
return st, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
|
|
}
|
|
|
|
func (r blockIndexReader) Postings(name string, values ...string) (index.Postings, error) {
|
|
p, err := r.ir.Postings(name, values...)
|
|
if err != nil {
|
|
return p, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings {
|
|
return r.ir.SortedPostings(p)
|
|
}
|
|
|
|
func (r blockIndexReader) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error {
|
|
if err := r.ir.Series(ref, lset, chks); err != nil {
|
|
return errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r blockIndexReader) LabelNames() ([]string, error) {
|
|
return r.b.LabelNames()
|
|
}
|
|
|
|
func (r blockIndexReader) Close() error {
|
|
r.b.pendingReaders.Done()
|
|
return nil
|
|
}
|
|
|
|
type blockTombstoneReader struct {
|
|
tombstones.Reader
|
|
b *Block
|
|
}
|
|
|
|
func (r blockTombstoneReader) Close() error {
|
|
r.b.pendingReaders.Done()
|
|
return nil
|
|
}
|
|
|
|
type blockChunkReader struct {
|
|
ChunkReader
|
|
b *Block
|
|
}
|
|
|
|
func (r blockChunkReader) Close() error {
|
|
r.b.pendingReaders.Done()
|
|
return nil
|
|
}
|
|
|
|
// Delete matching series between mint and maxt in the block.
|
|
func (pb *Block) Delete(mint, maxt int64, ms ...*labels.Matcher) error {
|
|
pb.mtx.Lock()
|
|
defer pb.mtx.Unlock()
|
|
|
|
if pb.closing {
|
|
return ErrClosing
|
|
}
|
|
|
|
p, err := PostingsForMatchers(pb.indexr, ms...)
|
|
if err != nil {
|
|
return errors.Wrap(err, "select series")
|
|
}
|
|
|
|
ir := pb.indexr
|
|
|
|
// Choose only valid postings which have chunks in the time-range.
|
|
stones := tombstones.NewMemTombstones()
|
|
|
|
var lset labels.Labels
|
|
var chks []chunks.Meta
|
|
|
|
Outer:
|
|
for p.Next() {
|
|
err := ir.Series(p.At(), &lset, &chks)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, chk := range chks {
|
|
if chk.OverlapsClosedInterval(mint, maxt) {
|
|
// Delete only until the current values and not beyond.
|
|
tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime)
|
|
stones.AddInterval(p.At(), tombstones.Interval{Mint: tmin, Maxt: tmax})
|
|
continue Outer
|
|
}
|
|
}
|
|
}
|
|
|
|
if p.Err() != nil {
|
|
return p.Err()
|
|
}
|
|
|
|
err = pb.tombstones.Iter(func(id uint64, ivs tombstones.Intervals) error {
|
|
for _, iv := range ivs {
|
|
stones.AddInterval(id, iv)
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.tombstones = stones
|
|
pb.meta.Stats.NumTombstones = pb.tombstones.Total()
|
|
|
|
n, err := tombstones.WriteFile(pb.logger, pb.dir, pb.tombstones)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.numBytesTombstone = n
|
|
n, err = writeMetaFile(pb.logger, pb.dir, &pb.meta)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pb.numBytesMeta = n
|
|
return nil
|
|
}
|
|
|
|
// CleanTombstones will remove the tombstones and rewrite the block (only if there are any tombstones).
|
|
// If there was a rewrite, then it returns the ULID of the new block written, else nil.
|
|
func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, error) {
|
|
numStones := 0
|
|
|
|
if err := pb.tombstones.Iter(func(id uint64, ivs tombstones.Intervals) error {
|
|
numStones += len(ivs)
|
|
return nil
|
|
}); err != nil {
|
|
// This should never happen, as the iteration function only returns nil.
|
|
panic(err)
|
|
}
|
|
if numStones == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
meta := pb.Meta()
|
|
uid, err := c.Write(dest, pb, pb.meta.MinTime, pb.meta.MaxTime, &meta)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &uid, nil
|
|
}
|
|
|
|
// Snapshot creates snapshot of the block into dir.
|
|
func (pb *Block) Snapshot(dir string) error {
|
|
blockDir := filepath.Join(dir, pb.meta.ULID.String())
|
|
if err := os.MkdirAll(blockDir, 0777); err != nil {
|
|
return errors.Wrap(err, "create snapshot block dir")
|
|
}
|
|
|
|
chunksDir := chunkDir(blockDir)
|
|
if err := os.MkdirAll(chunksDir, 0777); err != nil {
|
|
return errors.Wrap(err, "create snapshot chunk dir")
|
|
}
|
|
|
|
// Hardlink meta, index and tombstones
|
|
for _, fname := range []string{
|
|
metaFilename,
|
|
indexFilename,
|
|
tombstones.TombstonesFilename,
|
|
} {
|
|
if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil {
|
|
return errors.Wrapf(err, "create snapshot %s", fname)
|
|
}
|
|
}
|
|
|
|
// Hardlink the chunks
|
|
curChunkDir := chunkDir(pb.dir)
|
|
files, err := ioutil.ReadDir(curChunkDir)
|
|
if err != nil {
|
|
return errors.Wrap(err, "ReadDir the current chunk dir")
|
|
}
|
|
|
|
for _, f := range files {
|
|
err := os.Link(filepath.Join(curChunkDir, f.Name()), filepath.Join(chunksDir, f.Name()))
|
|
if err != nil {
|
|
return errors.Wrap(err, "hardlink a chunk")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// OverlapsClosedInterval returns true if the block overlaps [mint, maxt].
|
|
func (pb *Block) OverlapsClosedInterval(mint, maxt int64) bool {
|
|
// The block itself is a half-open interval
|
|
// [pb.meta.MinTime, pb.meta.MaxTime).
|
|
return pb.meta.MinTime <= maxt && mint < pb.meta.MaxTime
|
|
}
|
|
|
|
// LabelNames returns all the unique label names present in the Block in sorted order.
|
|
func (pb *Block) LabelNames() ([]string, error) {
|
|
return pb.indexr.LabelNames()
|
|
}
|
|
|
|
func clampInterval(a, b, mint, maxt int64) (int64, int64) {
|
|
if a < mint {
|
|
a = mint
|
|
}
|
|
if b > maxt {
|
|
b = maxt
|
|
}
|
|
return a, b
|
|
}
|