From a007eb2e1eeb3e42d504adb9bcd1e5cca804d524 Mon Sep 17 00:00:00 2001
From: Fabian Reinartz <fab.reinartz@gmail.com>
Date: Thu, 7 Sep 2017 14:14:33 +0200
Subject: [PATCH] vendor: update prometheus/tsdb to single head mode

---
 vendor/github.com/go-kit/kit/log/term/LICENSE |   21 +
 .../go-kit/kit/log/term/colorlogger.go        |  144 ++
 .../go-kit/kit/log/term/colorwriter_others.go |   12 +
 .../kit/log/term/colorwriter_windows.go       |  190 +++
 vendor/github.com/go-kit/kit/log/term/term.go |   22 +
 .../go-kit/kit/log/term/terminal_appengine.go |   15 +
 .../go-kit/kit/log/term/terminal_darwin.go    |   10 +
 .../go-kit/kit/log/term/terminal_freebsd.go   |    7 +
 .../go-kit/kit/log/term/terminal_linux.go     |   12 +
 .../kit/log/term/terminal_notwindows.go       |   25 +
 .../go-kit/kit/log/term/terminal_openbsd.go   |    5 +
 .../go-kit/kit/log/term/terminal_windows.go   |  102 ++
 vendor/github.com/prometheus/tsdb/block.go    |   49 +-
 vendor/github.com/prometheus/tsdb/chunks.go   |   11 +-
 vendor/github.com/prometheus/tsdb/compact.go  |  266 ++--
 vendor/github.com/prometheus/tsdb/db.go       |  557 ++-----
 .../prometheus/tsdb/encoding_helpers.go       |    2 +-
 vendor/github.com/prometheus/tsdb/head.go     | 1300 +++++++++++------
 vendor/github.com/prometheus/tsdb/index.go    |   20 +-
 vendor/github.com/prometheus/tsdb/pool.go     |   79 +
 vendor/github.com/prometheus/tsdb/postings.go |   74 +-
 vendor/github.com/prometheus/tsdb/querier.go  |   73 +-
 .../github.com/prometheus/tsdb/tombstones.go  |   79 +-
 vendor/github.com/prometheus/tsdb/wal.go      |  951 +++++++-----
 vendor/vendor.json                            |   14 +-
 25 files changed, 2562 insertions(+), 1478 deletions(-)
 create mode 100644 vendor/github.com/go-kit/kit/log/term/LICENSE
 create mode 100644 vendor/github.com/go-kit/kit/log/term/colorlogger.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/colorwriter_others.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/colorwriter_windows.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/term.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_appengine.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_darwin.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_freebsd.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_linux.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_notwindows.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_openbsd.go
 create mode 100644 vendor/github.com/go-kit/kit/log/term/terminal_windows.go
 create mode 100644 vendor/github.com/prometheus/tsdb/pool.go

diff --git a/vendor/github.com/go-kit/kit/log/term/LICENSE b/vendor/github.com/go-kit/kit/log/term/LICENSE
new file mode 100644
index 0000000000..f090cb42f3
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Simon Eskildsen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/github.com/go-kit/kit/log/term/colorlogger.go b/vendor/github.com/go-kit/kit/log/term/colorlogger.go
new file mode 100644
index 0000000000..00376ce0e7
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/colorlogger.go
@@ -0,0 +1,144 @@
+package term
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"sync"
+
+	"github.com/go-kit/kit/log"
+)
+
+// Color represents an ANSI color. The zero value is Default.
+type Color uint8
+
+// ANSI colors.
+const (
+	Default = Color(iota)
+
+	Black
+	DarkRed
+	DarkGreen
+	Brown
+	DarkBlue
+	DarkMagenta
+	DarkCyan
+	Gray
+
+	DarkGray
+	Red
+	Green
+	Yellow
+	Blue
+	Magenta
+	Cyan
+	White
+
+	numColors
+)
+
+// For more on ANSI escape codes see
+// https://en.wikipedia.org/wiki/ANSI_escape_code. See in particular
+// https://en.wikipedia.org/wiki/ANSI_escape_code#Colors.
+
+var (
+	resetColorBytes = []byte("\x1b[39;49;22m")
+	fgColorBytes    [][]byte
+	bgColorBytes    [][]byte
+)
+
+func init() {
+	// Default
+	fgColorBytes = append(fgColorBytes, []byte("\x1b[39m"))
+	bgColorBytes = append(bgColorBytes, []byte("\x1b[49m"))
+
+	// dark colors
+	for color := Black; color < DarkGray; color++ {
+		fgColorBytes = append(fgColorBytes, []byte(fmt.Sprintf("\x1b[%dm", 30+color-Black)))
+		bgColorBytes = append(bgColorBytes, []byte(fmt.Sprintf("\x1b[%dm", 40+color-Black)))
+	}
+
+	// bright colors
+	for color := DarkGray; color < numColors; color++ {
+		fgColorBytes = append(fgColorBytes, []byte(fmt.Sprintf("\x1b[%d;1m", 30+color-DarkGray)))
+		bgColorBytes = append(bgColorBytes, []byte(fmt.Sprintf("\x1b[%d;1m", 40+color-DarkGray)))
+	}
+}
+
+// FgBgColor represents a foreground and background color.
+type FgBgColor struct {
+	Fg, Bg Color
+}
+
+func (c FgBgColor) isZero() bool {
+	return c.Fg == Default && c.Bg == Default
+}
+
+// NewColorLogger returns a Logger which writes colored logs to w. ANSI color
+// codes for the colors returned by color are added to the formatted output
+// from the Logger returned by newLogger and the combined result written to w.
+func NewColorLogger(w io.Writer, newLogger func(io.Writer) log.Logger, color func(keyvals ...interface{}) FgBgColor) log.Logger {
+	if color == nil {
+		panic("color func nil")
+	}
+	return &colorLogger{
+		w:             w,
+		newLogger:     newLogger,
+		color:         color,
+		bufPool:       sync.Pool{New: func() interface{} { return &loggerBuf{} }},
+		noColorLogger: newLogger(w),
+	}
+}
+
+type colorLogger struct {
+	w             io.Writer
+	newLogger     func(io.Writer) log.Logger
+	color         func(keyvals ...interface{}) FgBgColor
+	bufPool       sync.Pool
+	noColorLogger log.Logger
+}
+
+func (l *colorLogger) Log(keyvals ...interface{}) error {
+	color := l.color(keyvals...)
+	if color.isZero() {
+		return l.noColorLogger.Log(keyvals...)
+	}
+
+	lb := l.getLoggerBuf()
+	defer l.putLoggerBuf(lb)
+	if color.Fg != Default {
+		lb.buf.Write(fgColorBytes[color.Fg])
+	}
+	if color.Bg != Default {
+		lb.buf.Write(bgColorBytes[color.Bg])
+	}
+	err := lb.logger.Log(keyvals...)
+	if err != nil {
+		return err
+	}
+	if color.Fg != Default || color.Bg != Default {
+		lb.buf.Write(resetColorBytes)
+	}
+	_, err = io.Copy(l.w, lb.buf)
+	return err
+}
+
+type loggerBuf struct {
+	buf    *bytes.Buffer
+	logger log.Logger
+}
+
+func (l *colorLogger) getLoggerBuf() *loggerBuf {
+	lb := l.bufPool.Get().(*loggerBuf)
+	if lb.buf == nil {
+		lb.buf = &bytes.Buffer{}
+		lb.logger = l.newLogger(lb.buf)
+	} else {
+		lb.buf.Reset()
+	}
+	return lb
+}
+
+func (l *colorLogger) putLoggerBuf(cb *loggerBuf) {
+	l.bufPool.Put(cb)
+}
diff --git a/vendor/github.com/go-kit/kit/log/term/colorwriter_others.go b/vendor/github.com/go-kit/kit/log/term/colorwriter_others.go
new file mode 100644
index 0000000000..cc571024b1
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/colorwriter_others.go
@@ -0,0 +1,12 @@
+// +build !windows
+
+package term
+
+import "io"
+
+// NewColorWriter returns an io.Writer that writes to w and provides cross
+// platform support for ANSI color codes. If w is not a terminal it is
+// returned unmodified.
+func NewColorWriter(w io.Writer) io.Writer {
+	return w
+}
diff --git a/vendor/github.com/go-kit/kit/log/term/colorwriter_windows.go b/vendor/github.com/go-kit/kit/log/term/colorwriter_windows.go
new file mode 100644
index 0000000000..fcacda3a63
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/colorwriter_windows.go
@@ -0,0 +1,190 @@
+// The code in this file is adapted from github.com/mattn/go-colorable.
+
+// +build windows
+
+package term
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+	"syscall"
+	"unsafe"
+)
+
+type colorWriter struct {
+	out     io.Writer
+	handle  syscall.Handle
+	lastbuf bytes.Buffer
+	oldattr word
+}
+
+// NewColorWriter returns an io.Writer that writes to w and provides cross
+// platform support for ANSI color codes. If w is not a terminal it is
+// returned unmodified.
+func NewColorWriter(w io.Writer) io.Writer {
+	if !IsConsole(w) {
+		return w
+	}
+
+	var csbi consoleScreenBufferInfo
+	handle := syscall.Handle(w.(fder).Fd())
+	procGetConsoleScreenBufferInfo.Call(uintptr(handle), uintptr(unsafe.Pointer(&csbi)))
+
+	return &colorWriter{
+		out:     w,
+		handle:  handle,
+		oldattr: csbi.attributes,
+	}
+}
+
+func (w *colorWriter) Write(data []byte) (n int, err error) {
+	var csbi consoleScreenBufferInfo
+	procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+
+	er := bytes.NewBuffer(data)
+loop:
+	for {
+		r1, _, err := procGetConsoleScreenBufferInfo.Call(uintptr(w.handle), uintptr(unsafe.Pointer(&csbi)))
+		if r1 == 0 {
+			break loop
+		}
+
+		c1, _, err := er.ReadRune()
+		if err != nil {
+			break loop
+		}
+		if c1 != 0x1b {
+			fmt.Fprint(w.out, string(c1))
+			continue
+		}
+		c2, _, err := er.ReadRune()
+		if err != nil {
+			w.lastbuf.WriteRune(c1)
+			break loop
+		}
+		if c2 != 0x5b {
+			w.lastbuf.WriteRune(c1)
+			w.lastbuf.WriteRune(c2)
+			continue
+		}
+
+		var buf bytes.Buffer
+		var m rune
+		for {
+			c, _, err := er.ReadRune()
+			if err != nil {
+				w.lastbuf.WriteRune(c1)
+				w.lastbuf.WriteRune(c2)
+				w.lastbuf.Write(buf.Bytes())
+				break loop
+			}
+			if ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '@' {
+				m = c
+				break
+			}
+			buf.Write([]byte(string(c)))
+		}
+
+		switch m {
+		case 'm':
+			attr := csbi.attributes
+			cs := buf.String()
+			if cs == "" {
+				procSetConsoleTextAttribute.Call(uintptr(w.handle), uintptr(w.oldattr))
+				continue
+			}
+			token := strings.Split(cs, ";")
+			intensityMode := word(0)
+			for _, ns := range token {
+				if n, err = strconv.Atoi(ns); err == nil {
+					switch {
+					case n == 0:
+						attr = w.oldattr
+					case n == 1:
+						attr |= intensityMode
+					case 30 <= n && n <= 37:
+						attr = (attr & backgroundMask)
+						if (n-30)&1 != 0 {
+							attr |= foregroundRed
+						}
+						if (n-30)&2 != 0 {
+							attr |= foregroundGreen
+						}
+						if (n-30)&4 != 0 {
+							attr |= foregroundBlue
+						}
+						intensityMode = foregroundIntensity
+					case n == 39: // reset foreground color
+						attr &= backgroundMask
+						attr |= w.oldattr & foregroundMask
+					case 40 <= n && n <= 47:
+						attr = (attr & foregroundMask)
+						if (n-40)&1 != 0 {
+							attr |= backgroundRed
+						}
+						if (n-40)&2 != 0 {
+							attr |= backgroundGreen
+						}
+						if (n-40)&4 != 0 {
+							attr |= backgroundBlue
+						}
+						intensityMode = backgroundIntensity
+					case n == 49: // reset background color
+						attr &= foregroundMask
+						attr |= w.oldattr & backgroundMask
+					}
+					procSetConsoleTextAttribute.Call(uintptr(w.handle), uintptr(attr))
+				}
+			}
+		}
+	}
+	return len(data) - w.lastbuf.Len(), nil
+}
+
+var (
+	procGetConsoleScreenBufferInfo = kernel32.NewProc("GetConsoleScreenBufferInfo")
+	procSetConsoleTextAttribute    = kernel32.NewProc("SetConsoleTextAttribute")
+)
+
+const (
+	foregroundBlue      = 0x1
+	foregroundGreen     = 0x2
+	foregroundRed       = 0x4
+	foregroundIntensity = 0x8
+	foregroundMask      = (foregroundRed | foregroundBlue | foregroundGreen | foregroundIntensity)
+	backgroundBlue      = 0x10
+	backgroundGreen     = 0x20
+	backgroundRed       = 0x40
+	backgroundIntensity = 0x80
+	backgroundMask      = (backgroundRed | backgroundBlue | backgroundGreen | backgroundIntensity)
+)
+
+type (
+	wchar uint16
+	short int16
+	dword uint32
+	word  uint16
+)
+
+type coord struct {
+	x short
+	y short
+}
+
+type smallRect struct {
+	left   short
+	top    short
+	right  short
+	bottom short
+}
+
+type consoleScreenBufferInfo struct {
+	size              coord
+	cursorPosition    coord
+	attributes        word
+	window            smallRect
+	maximumWindowSize coord
+}
diff --git a/vendor/github.com/go-kit/kit/log/term/term.go b/vendor/github.com/go-kit/kit/log/term/term.go
new file mode 100644
index 0000000000..3965f1c8be
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/term.go
@@ -0,0 +1,22 @@
+// Package term provides tools for logging to a terminal.
+package term
+
+import (
+	"io"
+
+	"github.com/go-kit/kit/log"
+)
+
+// NewLogger returns a Logger that takes advantage of terminal features if
+// possible. Log events are formatted by the Logger returned by newLogger. If
+// w is a terminal each log event is colored according to the color function.
+func NewLogger(w io.Writer, newLogger func(io.Writer) log.Logger, color func(keyvals ...interface{}) FgBgColor) log.Logger {
+	if !IsTerminal(w) {
+		return newLogger(w)
+	}
+	return NewColorLogger(NewColorWriter(w), newLogger, color)
+}
+
+type fder interface {
+	Fd() uintptr
+}
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_appengine.go b/vendor/github.com/go-kit/kit/log/term/terminal_appengine.go
new file mode 100644
index 0000000000..b023121ae7
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_appengine.go
@@ -0,0 +1,15 @@
+// Based on ssh/terminal:
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build appengine
+
+package term
+
+import "io"
+
+// IsTerminal always returns false on AppEngine.
+func IsTerminal(w io.Writer) bool {
+	return false
+}
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_darwin.go b/vendor/github.com/go-kit/kit/log/term/terminal_darwin.go
new file mode 100644
index 0000000000..459cf54ab9
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_darwin.go
@@ -0,0 +1,10 @@
+// Based on ssh/terminal:
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package term
+
+import "syscall"
+
+const ioctlReadTermios = syscall.TIOCGETA
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_freebsd.go b/vendor/github.com/go-kit/kit/log/term/terminal_freebsd.go
new file mode 100644
index 0000000000..791d5c69e3
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_freebsd.go
@@ -0,0 +1,7 @@
+package term
+
+import (
+	"syscall"
+)
+
+const ioctlReadTermios = syscall.TIOCGETA
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_linux.go b/vendor/github.com/go-kit/kit/log/term/terminal_linux.go
new file mode 100644
index 0000000000..ffeab4d15c
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_linux.go
@@ -0,0 +1,12 @@
+// Based on ssh/terminal:
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+
+package term
+
+import "syscall"
+
+const ioctlReadTermios = syscall.TCGETS
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_notwindows.go b/vendor/github.com/go-kit/kit/log/term/terminal_notwindows.go
new file mode 100644
index 0000000000..9c72558c1c
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_notwindows.go
@@ -0,0 +1,25 @@
+// Based on ssh/terminal:
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,!appengine darwin freebsd openbsd
+
+package term
+
+import (
+	"io"
+	"syscall"
+	"unsafe"
+)
+
+// IsTerminal returns true if w writes to a terminal.
+func IsTerminal(w io.Writer) bool {
+	fw, ok := w.(fder)
+	if !ok {
+		return false
+	}
+	var termios syscall.Termios
+	_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, fw.Fd(), ioctlReadTermios, uintptr(unsafe.Pointer(&termios)), 0, 0, 0)
+	return err == 0
+}
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_openbsd.go b/vendor/github.com/go-kit/kit/log/term/terminal_openbsd.go
new file mode 100644
index 0000000000..f9931666bd
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_openbsd.go
@@ -0,0 +1,5 @@
+package term
+
+import "syscall"
+
+const ioctlReadTermios = syscall.TIOCGETA
diff --git a/vendor/github.com/go-kit/kit/log/term/terminal_windows.go b/vendor/github.com/go-kit/kit/log/term/terminal_windows.go
new file mode 100644
index 0000000000..753fd12d86
--- /dev/null
+++ b/vendor/github.com/go-kit/kit/log/term/terminal_windows.go
@@ -0,0 +1,102 @@
+// Based on ssh/terminal:
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+package term
+
+import (
+	"encoding/binary"
+	"io"
+	"regexp"
+	"syscall"
+	"unsafe"
+)
+
+var kernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+var (
+	procGetFileInformationByHandleEx = kernel32.NewProc("GetFileInformationByHandleEx")
+	msysPipeNameRegex                = regexp.MustCompile(`\\(cygwin|msys)-\w+-pty\d?-(to|from)-master`)
+)
+
+const (
+	fileNameInfo = 0x02
+)
+
+// IsTerminal returns true if w writes to a terminal.
+func IsTerminal(w io.Writer) bool {
+	return IsConsole(w) || IsMSYSTerminal(w)
+}
+
+// IsConsole returns true if w writes to a Windows console.
+func IsConsole(w io.Writer) bool {
+	var handle syscall.Handle
+
+	if fw, ok := w.(fder); ok {
+		handle = syscall.Handle(fw.Fd())
+	} else {
+		// The writer has no file-descriptor and so can't be a terminal.
+		return false
+	}
+
+	var st uint32
+	err := syscall.GetConsoleMode(handle, &st)
+
+	// If the handle is attached to a terminal, GetConsoleMode returns a
+	// non-zero value containing the console mode flags. We don't care about
+	// the specifics of flags, just that it is not zero.
+	return (err == nil && st != 0)
+}
+
+// IsMSYSTerminal returns true if w writes to a MSYS/MSYS2 terminal.
+func IsMSYSTerminal(w io.Writer) bool {
+	var handle syscall.Handle
+
+	if fw, ok := w.(fder); ok {
+		handle = syscall.Handle(fw.Fd())
+	} else {
+		// The writer has no file-descriptor and so can't be a terminal.
+		return false
+	}
+
+	// MSYS(2) terminal reports as a pipe for STDIN/STDOUT/STDERR. If it isn't
+	// a pipe, it can't be a MSYS(2) terminal.
+	filetype, err := syscall.GetFileType(handle)
+
+	if filetype != syscall.FILE_TYPE_PIPE || err != nil {
+		return false
+	}
+
+	// MSYS2/Cygwin terminal's name looks like: \msys-dd50a72ab4668b33-pty2-to-master
+	data := make([]byte, 256, 256)
+
+	r, _, e := syscall.Syscall6(
+		procGetFileInformationByHandleEx.Addr(),
+		4,
+		uintptr(handle),
+		uintptr(fileNameInfo),
+		uintptr(unsafe.Pointer(&data[0])),
+		uintptr(len(data)),
+		0,
+		0,
+	)
+
+	if r != 0 && e == 0 {
+		// The first 4 bytes of the buffer are the size of the UTF16 name, in bytes.
+		unameLen := binary.LittleEndian.Uint32(data[:4]) / 2
+		uname := make([]uint16, unameLen, unameLen)
+
+		for i := uint32(0); i < unameLen; i++ {
+			uname[i] = binary.LittleEndian.Uint16(data[i*2+4 : i*2+2+4])
+		}
+
+		name := syscall.UTF16ToString(uname)
+
+		return msysPipeNameRegex.MatchString(name)
+	}
+
+	return false
+}
diff --git a/vendor/github.com/prometheus/tsdb/block.go b/vendor/github.com/prometheus/tsdb/block.go
index bc9f581ab7..67cd574918 100644
--- a/vendor/github.com/prometheus/tsdb/block.go
+++ b/vendor/github.com/prometheus/tsdb/block.go
@@ -26,14 +26,23 @@ import (
 	"github.com/prometheus/tsdb/labels"
 )
 
-// DiskBlock handles reads against a Block of time series data.
 type DiskBlock interface {
+	BlockReader
+
 	// Directory where block data is stored.
 	Dir() string
 
 	// Stats returns statistics about the block.
 	Meta() BlockMeta
 
+	Delete(mint, maxt int64, m ...labels.Matcher) error
+
+	Snapshot(dir string) error
+
+	Close() error
+}
+
+type BlockReader interface {
 	// Index returns an IndexReader over the block's data.
 	Index() IndexReader
 
@@ -42,30 +51,6 @@ type DiskBlock interface {
 
 	// Tombstones returns a TombstoneReader over the block's deleted data.
 	Tombstones() TombstoneReader
-
-	// Delete deletes data from the block.
-	Delete(mint, maxt int64, ms ...labels.Matcher) error
-
-	// Close releases all underlying resources of the block.
-	Close() error
-}
-
-// Block is an interface to a DiskBlock that can also be queried.
-type Block interface {
-	DiskBlock
-	Queryable
-	Snapshottable
-}
-
-// headBlock is a regular block that can still be appended to.
-type headBlock interface {
-	Block
-	Appendable
-
-	// ActiveWriters returns the number of currently active appenders.
-	ActiveWriters() int
-	// HighTimestamp returns the highest currently inserted timestamp.
-	HighTimestamp() int64
 }
 
 // Snapshottable defines an entity that can be backedup online.
@@ -225,16 +210,6 @@ func (pb *persistedBlock) String() string {
 	return pb.meta.ULID.String()
 }
 
-func (pb *persistedBlock) Querier(mint, maxt int64) Querier {
-	return &blockQuerier{
-		mint:       mint,
-		maxt:       maxt,
-		index:      pb.Index(),
-		chunks:     pb.Chunks(),
-		tombstones: pb.Tombstones(),
-	}
-}
-
 func (pb *persistedBlock) Dir() string         { return pb.dir }
 func (pb *persistedBlock) Index() IndexReader  { return pb.indexr }
 func (pb *persistedBlock) Chunks() ChunkReader { return pb.chunkr }
@@ -250,7 +225,7 @@ func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	ir := pb.indexr
 
 	// Choose only valid postings which have chunks in the time-range.
-	stones := map[uint32]intervals{}
+	stones := map[uint64]Intervals{}
 
 	var lset labels.Labels
 	var chks []ChunkMeta
@@ -272,7 +247,7 @@ Outer:
 			if intervalOverlap(mint, maxt, chk.MinTime, chk.MaxTime) {
 				// Delete only until the current vlaues and not beyond.
 				tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime)
-				stones[p.At()] = intervals{{tmin, tmax}}
+				stones[p.At()] = Intervals{{tmin, tmax}}
 				continue Outer
 			}
 		}
diff --git a/vendor/github.com/prometheus/tsdb/chunks.go b/vendor/github.com/prometheus/tsdb/chunks.go
index 6bed69700f..5955c50851 100644
--- a/vendor/github.com/prometheus/tsdb/chunks.go
+++ b/vendor/github.com/prometheus/tsdb/chunks.go
@@ -18,7 +18,6 @@ import (
 	"encoding/binary"
 	"fmt"
 	"hash"
-	"hash/crc32"
 	"io"
 	"os"
 
@@ -59,7 +58,7 @@ func (cm *ChunkMeta) writeHash(h hash.Hash) error {
 type deletedIterator struct {
 	it chunks.Iterator
 
-	intervals intervals
+	intervals Intervals
 }
 
 func (it *deletedIterator) At() (int64, float64) {
@@ -76,7 +75,7 @@ Outer:
 				continue Outer
 			}
 
-			if ts > tr.maxt {
+			if ts > tr.Maxt {
 				it.intervals = it.intervals[1:]
 				continue
 			}
@@ -136,7 +135,7 @@ func newChunkWriter(dir string) (*chunkWriter, error) {
 	cw := &chunkWriter{
 		dirFile:     dirFile,
 		n:           0,
-		crc32:       crc32.New(crc32.MakeTable(crc32.Castagnoli)),
+		crc32:       newCRC32(),
 		segmentSize: defaultChunkSegmentSize,
 	}
 	return cw, nil
@@ -180,7 +179,7 @@ func (w *chunkWriter) cut() error {
 		return err
 	}
 
-	p, _, err := nextSequenceFile(w.dirFile.Name(), "")
+	p, _, err := nextSequenceFile(w.dirFile.Name())
 	if err != nil {
 		return err
 	}
@@ -303,7 +302,7 @@ type chunkReader struct {
 
 // newChunkReader returns a new chunkReader based on mmaped files found in dir.
 func newChunkReader(dir string, pool chunks.Pool) (*chunkReader, error) {
-	files, err := sequenceFiles(dir, "")
+	files, err := sequenceFiles(dir)
 	if err != nil {
 		return nil, err
 	}
diff --git a/vendor/github.com/prometheus/tsdb/compact.go b/vendor/github.com/prometheus/tsdb/compact.go
index dc803ef8f0..7d8174f0d3 100644
--- a/vendor/github.com/prometheus/tsdb/compact.go
+++ b/vendor/github.com/prometheus/tsdb/compact.go
@@ -14,10 +14,10 @@
 package tsdb
 
 import (
-	"fmt"
 	"math/rand"
 	"os"
 	"path/filepath"
+	"runtime"
 	"sort"
 	"time"
 
@@ -51,7 +51,7 @@ type Compactor interface {
 	Plan(dir string) ([]string, error)
 
 	// Write persists a Block into a directory.
-	Write(dest string, b Block) error
+	Write(dest string, b BlockReader, mint, maxt int64) error
 
 	// Compact runs compaction against the provided directories. Must
 	// only be called concurrently with results of Plan().
@@ -60,16 +60,20 @@ type Compactor interface {
 
 // LeveledCompactor implements the Compactor interface.
 type LeveledCompactor struct {
-	dir     string
-	metrics *compactorMetrics
-	logger  log.Logger
-	opts    *LeveledCompactorOptions
+	dir       string
+	metrics   *compactorMetrics
+	logger    log.Logger
+	ranges    []int64
+	chunkPool chunks.Pool
 }
 
 type compactorMetrics struct {
-	ran      prometheus.Counter
-	failed   prometheus.Counter
-	duration prometheus.Histogram
+	ran          prometheus.Counter
+	failed       prometheus.Counter
+	duration     prometheus.Histogram
+	chunkSize    prometheus.Histogram
+	chunkSamples prometheus.Histogram
+	chunkRange   prometheus.Histogram
 }
 
 func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
@@ -83,9 +87,25 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
 		Name: "tsdb_compactions_failed_total",
 		Help: "Total number of compactions that failed for the partition.",
 	})
-	m.duration = prometheus.NewSummary(prometheus.SummaryOpts{
-		Name: "tsdb_compaction_duration",
-		Help: "Duration of compaction runs.",
+	m.duration = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Name:    "tsdb_compaction_duration",
+		Help:    "Duration of compaction runs.",
+		Buckets: prometheus.ExponentialBuckets(1, 2, 10),
+	})
+	m.chunkSize = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Name:    "tsdb_compaction_chunk_size",
+		Help:    "Final size of chunks on their first compaction",
+		Buckets: prometheus.ExponentialBuckets(32, 1.5, 12),
+	})
+	m.chunkSamples = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Name:    "tsdb_compaction_chunk_samples",
+		Help:    "Final number of samples on their first compaction",
+		Buckets: prometheus.ExponentialBuckets(4, 1.5, 12),
+	})
+	m.chunkRange = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Name:    "tsdb_compaction_chunk_range",
+		Help:    "Final time range of chunks on their first compaction",
+		Buckets: prometheus.ExponentialBuckets(100, 4, 10),
 	})
 
 	if r != nil {
@@ -93,39 +113,30 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
 			m.ran,
 			m.failed,
 			m.duration,
+			m.chunkRange,
+			m.chunkSamples,
+			m.chunkSize,
 		)
 	}
 	return m
 }
 
-// LeveledCompactorOptions are the options for a LeveledCompactor.
-type LeveledCompactorOptions struct {
-	blockRanges []int64
-	chunkPool   chunks.Pool
-}
-
 // NewLeveledCompactor returns a LeveledCompactor.
-func NewLeveledCompactor(r prometheus.Registerer, l log.Logger, opts *LeveledCompactorOptions) *LeveledCompactor {
-	if opts == nil {
-		opts = &LeveledCompactorOptions{
-			chunkPool: chunks.NewPool(),
-		}
+func NewLeveledCompactor(r prometheus.Registerer, l log.Logger, ranges []int64, pool chunks.Pool) (*LeveledCompactor, error) {
+	if len(ranges) == 0 {
+		return nil, errors.Errorf("at least one range must be provided")
+	}
+	if pool == nil {
+		pool = chunks.NewPool()
 	}
 	return &LeveledCompactor{
-		opts:    opts,
-		logger:  l,
-		metrics: newCompactorMetrics(r),
-	}
+		ranges:    ranges,
+		chunkPool: pool,
+		logger:    l,
+		metrics:   newCompactorMetrics(r),
+	}, nil
 }
 
-type compactionInfo struct {
-	seq        int
-	generation int
-	mint, maxt int64
-}
-
-const compactionBlocksLen = 3
-
 type dirMeta struct {
 	dir  string
 	meta *BlockMeta
@@ -145,21 +156,15 @@ func (c *LeveledCompactor) Plan(dir string) ([]string, error) {
 		if err != nil {
 			return nil, err
 		}
-		if meta.Compaction.Level > 0 {
-			dms = append(dms, dirMeta{dir, meta})
-		}
+		dms = append(dms, dirMeta{dir, meta})
 	}
-	sort.Slice(dms, func(i, j int) bool {
-		return dms[i].meta.MinTime < dms[j].meta.MinTime
-	})
-
 	return c.plan(dms)
 }
 
 func (c *LeveledCompactor) plan(dms []dirMeta) ([]string, error) {
-	if len(dms) <= 1 {
-		return nil, nil
-	}
+	sort.Slice(dms, func(i, j int) bool {
+		return dms[i].meta.MinTime < dms[j].meta.MinTime
+	})
 
 	var res []string
 	for _, dm := range c.selectDirs(dms) {
@@ -172,11 +177,11 @@ func (c *LeveledCompactor) plan(dms []dirMeta) ([]string, error) {
 	// Compact any blocks that have >5% tombstones.
 	for i := len(dms) - 1; i >= 0; i-- {
 		meta := dms[i].meta
-		if meta.MaxTime-meta.MinTime < c.opts.blockRanges[len(c.opts.blockRanges)/2] {
+		if meta.MaxTime-meta.MinTime < c.ranges[len(c.ranges)/2] {
 			break
 		}
 
-		if meta.Stats.NumSeries/(meta.Stats.NumTombstones+1) <= 20 { // 5%
+		if float64(meta.Stats.NumTombstones)/float64(meta.Stats.NumSeries+1) > 0.05 {
 			return []string{dms[i].dir}, nil
 		}
 	}
@@ -187,13 +192,13 @@ func (c *LeveledCompactor) plan(dms []dirMeta) ([]string, error) {
 // selectDirs returns the dir metas that should be compacted into a single new block.
 // If only a single block range is configured, the result is always nil.
 func (c *LeveledCompactor) selectDirs(ds []dirMeta) []dirMeta {
-	if len(c.opts.blockRanges) < 2 || len(ds) < 1 {
+	if len(c.ranges) < 2 || len(ds) < 1 {
 		return nil
 	}
 
 	highTime := ds[len(ds)-1].meta.MinTime
 
-	for _, iv := range c.opts.blockRanges[1:] {
+	for _, iv := range c.ranges[1:] {
 		parts := splitByRange(ds, iv)
 		if len(parts) == 0 {
 			continue
@@ -258,9 +263,12 @@ func splitByRange(ds []dirMeta, tr int64) [][]dirMeta {
 	return splitDirs
 }
 
-func compactBlockMetas(blocks ...BlockMeta) (res BlockMeta) {
-	res.MinTime = blocks[0].MinTime
-	res.MaxTime = blocks[len(blocks)-1].MaxTime
+func compactBlockMetas(uid ulid.ULID, blocks ...*BlockMeta) *BlockMeta {
+	res := &BlockMeta{
+		ULID:    uid,
+		MinTime: blocks[0].MinTime,
+		MaxTime: blocks[len(blocks)-1].MaxTime,
+	}
 
 	sources := map[ulid.ULID]struct{}{}
 
@@ -271,10 +279,6 @@ func compactBlockMetas(blocks ...BlockMeta) (res BlockMeta) {
 		for _, s := range b.Compaction.Sources {
 			sources[s] = struct{}{}
 		}
-		// If it's an in memory block, its ULID goes into the sources.
-		if b.Compaction.Level == 0 {
-			sources[b.ULID] = struct{}{}
-		}
 	}
 	res.Compaction.Level++
 
@@ -291,40 +295,69 @@ func compactBlockMetas(blocks ...BlockMeta) (res BlockMeta) {
 // Compact creates a new block in the compactor's directory from the blocks in the
 // provided directories.
 func (c *LeveledCompactor) Compact(dest string, dirs ...string) (err error) {
-	var blocks []Block
+	var blocks []BlockReader
+	var metas []*BlockMeta
 
 	for _, d := range dirs {
-		b, err := newPersistedBlock(d, c.opts.chunkPool)
+		b, err := newPersistedBlock(d, c.chunkPool)
 		if err != nil {
 			return err
 		}
 		defer b.Close()
 
+		meta, err := readMetaFile(d)
+		if err != nil {
+			return err
+		}
+
+		metas = append(metas, meta)
 		blocks = append(blocks, b)
 	}
 
 	entropy := rand.New(rand.NewSource(time.Now().UnixNano()))
 	uid := ulid.MustNew(ulid.Now(), entropy)
 
-	return c.write(dest, uid, blocks...)
+	return c.write(dest, compactBlockMetas(uid, metas...), blocks...)
 }
 
-func (c *LeveledCompactor) Write(dest string, b Block) error {
-	// Buffering blocks might have been created that often have no data.
-	if b.Meta().Stats.NumSeries == 0 {
-		return nil
-	}
-
+func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64) error {
 	entropy := rand.New(rand.NewSource(time.Now().UnixNano()))
 	uid := ulid.MustNew(ulid.Now(), entropy)
 
-	return c.write(dest, uid, b)
+	meta := &BlockMeta{
+		ULID:    uid,
+		MinTime: mint,
+		MaxTime: maxt,
+	}
+	meta.Compaction.Level = 1
+	meta.Compaction.Sources = []ulid.ULID{uid}
+
+	return c.write(dest, meta, b)
+}
+
+// instrumentedChunkWriter is used for level 1 compactions to record statistics
+// about compacted chunks.
+type instrumentedChunkWriter struct {
+	ChunkWriter
+
+	size    prometheus.Histogram
+	samples prometheus.Histogram
+	trange  prometheus.Histogram
+}
+
+func (w *instrumentedChunkWriter) WriteChunks(chunks ...ChunkMeta) error {
+	for _, c := range chunks {
+		w.size.Observe(float64(len(c.Chunk.Bytes())))
+		w.samples.Observe(float64(c.Chunk.NumSamples()))
+		w.trange.Observe(float64(c.MaxTime - c.MinTime))
+	}
+	return w.ChunkWriter.WriteChunks(chunks...)
 }
 
 // write creates a new block that is the union of the provided blocks into dir.
 // It cleans up all files of the old blocks after completing successfully.
-func (c *LeveledCompactor) write(dest string, uid ulid.ULID, blocks ...Block) (err error) {
-	c.logger.Log("msg", "compact blocks", "blocks", fmt.Sprintf("%v", blocks))
+func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockReader) (err error) {
+	c.logger.Log("msg", "compact blocks", "count", len(blocks), "mint", meta.MinTime, "maxt", meta.MaxTime)
 
 	defer func(t time.Time) {
 		if err != nil {
@@ -332,9 +365,13 @@ func (c *LeveledCompactor) write(dest string, uid ulid.ULID, blocks ...Block) (e
 		}
 		c.metrics.ran.Inc()
 		c.metrics.duration.Observe(time.Since(t).Seconds())
+
+		// We might have done quite a few allocs. Enforce a GC so they do not accumulate
+		// with subsequent compactions or head GCs.
+		runtime.GC()
 	}(time.Now())
 
-	dir := filepath.Join(dest, uid.String())
+	dir := filepath.Join(dest, meta.ULID.String())
 	tmp := dir + ".tmp"
 
 	if err = os.RemoveAll(tmp); err != nil {
@@ -347,20 +384,30 @@ func (c *LeveledCompactor) write(dest string, uid ulid.ULID, blocks ...Block) (e
 
 	// Populate chunk and index files into temporary directory with
 	// data of all blocks.
-	chunkw, err := newChunkWriter(chunkDir(tmp))
+	var chunkw ChunkWriter
+
+	chunkw, err = newChunkWriter(chunkDir(tmp))
 	if err != nil {
 		return errors.Wrap(err, "open chunk writer")
 	}
+	// Record written chunk sizes on level 1 compactions.
+	if meta.Compaction.Level == 1 {
+		chunkw = &instrumentedChunkWriter{
+			ChunkWriter: chunkw,
+			size:        c.metrics.chunkSize,
+			samples:     c.metrics.chunkSamples,
+			trange:      c.metrics.chunkRange,
+		}
+	}
+
 	indexw, err := newIndexWriter(tmp)
 	if err != nil {
 		return errors.Wrap(err, "open index writer")
 	}
 
-	meta, err := c.populateBlock(blocks, indexw, chunkw)
-	if err != nil {
+	if err := c.populateBlock(blocks, meta, indexw, chunkw); err != nil {
 		return errors.Wrap(err, "write compaction")
 	}
-	meta.ULID = uid
 
 	if err = writeMetaFile(tmp, meta); err != nil {
 		return errors.Wrap(err, "write merged meta")
@@ -398,18 +445,16 @@ func (c *LeveledCompactor) write(dest string, uid ulid.ULID, blocks ...Block) (e
 
 // populateBlock fills the index and chunk writers with new data gathered as the union
 // of the provided blocks. It returns meta information for the new block.
-func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chunkw ChunkWriter) (*BlockMeta, error) {
+func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter) error {
 	var (
 		set        compactionSet
-		metas      []BlockMeta
 		allSymbols = make(map[string]struct{}, 1<<16)
 	)
 	for i, b := range blocks {
-		metas = append(metas, b.Meta())
 
 		symbols, err := b.Index().Symbols()
 		if err != nil {
-			return nil, errors.Wrap(err, "read symbols")
+			return errors.Wrap(err, "read symbols")
 		}
 		for s := range symbols {
 			allSymbols[s] = struct{}{}
@@ -419,7 +464,7 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 
 		all, err := indexr.Postings("", "")
 		if err != nil {
-			return nil, err
+			return err
 		}
 		all = indexr.SortedPostings(all)
 
@@ -431,20 +476,19 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 		}
 		set, err = newCompactionMerger(set, s)
 		if err != nil {
-			return nil, err
+			return err
 		}
 	}
 
 	// We fully rebuild the postings list index from merged series.
 	var (
-		postings = &memPostings{m: make(map[term][]uint32, 512)}
+		postings = newMemPostings()
 		values   = map[string]stringset{}
-		i        = uint32(0)
-		meta     = compactBlockMetas(metas...)
+		i        = uint64(0)
 	)
 
 	if err := indexw.AddSymbols(allSymbols); err != nil {
-		return nil, errors.Wrap(err, "add symbols")
+		return errors.Wrap(err, "add symbols")
 	}
 
 	for set.Next() {
@@ -458,11 +502,11 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 		if len(dranges) > 0 {
 			// Re-encode the chunk to not have deleted values.
 			for _, chk := range chks {
-				if intervalOverlap(dranges[0].mint, dranges[len(dranges)-1].maxt, chk.MinTime, chk.MaxTime) {
+				if intervalOverlap(dranges[0].Mint, dranges[len(dranges)-1].Maxt, chk.MinTime, chk.MaxTime) {
 					newChunk := chunks.NewXORChunk()
 					app, err := newChunk.Appender()
 					if err != nil {
-						return nil, err
+						return err
 					}
 
 					it := &deletedIterator{it: chk.Chunk.Iterator(), intervals: dranges}
@@ -476,11 +520,11 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 			}
 		}
 		if err := chunkw.WriteChunks(chks...); err != nil {
-			return nil, err
+			return errors.Wrap(err, "write chunks")
 		}
 
 		if err := indexw.AddSeries(i, lset, chks...); err != nil {
-			return nil, errors.Wrapf(err, "add series")
+			return errors.Wrap(err, "add series")
 		}
 
 		meta.Stats.NumChunks += uint64(len(chks))
@@ -490,7 +534,7 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 		}
 
 		for _, chk := range chks {
-			c.opts.chunkPool.Put(chk.Chunk)
+			c.chunkPool.Put(chk.Chunk)
 		}
 
 		for _, l := range lset {
@@ -500,15 +544,13 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 				values[l.Name] = valset
 			}
 			valset.set(l.Value)
-
-			t := term{name: l.Name, value: l.Value}
-
-			postings.add(i, t)
 		}
+		postings.add(i, lset)
+
 		i++
 	}
 	if set.Err() != nil {
-		return nil, set.Err()
+		return errors.Wrap(set.Err(), "iterate compaction set")
 	}
 
 	s := make([]string, 0, 256)
@@ -519,30 +561,30 @@ func (c *LeveledCompactor) populateBlock(blocks []Block, indexw IndexWriter, chu
 			s = append(s, x)
 		}
 		if err := indexw.WriteLabelIndex([]string{n}, s); err != nil {
-			return nil, err
+			return errors.Wrap(err, "write label index")
 		}
 	}
 
-	for t := range postings.m {
-		if err := indexw.WritePostings(t.name, t.value, postings.get(t)); err != nil {
-			return nil, err
+	for l := range postings.m {
+		if err := indexw.WritePostings(l.Name, l.Value, postings.get(l.Name, l.Value)); err != nil {
+			return errors.Wrap(err, "write postings")
 		}
 	}
 	// Write a postings list containing all series.
-	all := make([]uint32, i)
+	all := make([]uint64, i)
 	for i := range all {
-		all[i] = uint32(i)
+		all[i] = uint64(i)
 	}
 	if err := indexw.WritePostings("", "", newListPostings(all)); err != nil {
-		return nil, err
+		return errors.Wrap(err, "write 'all' postings")
 	}
 
-	return &meta, nil
+	return nil
 }
 
 type compactionSet interface {
 	Next() bool
-	At() (labels.Labels, []ChunkMeta, intervals)
+	At() (labels.Labels, []ChunkMeta, Intervals)
 	Err() error
 }
 
@@ -555,7 +597,7 @@ type compactionSeriesSet struct {
 
 	l         labels.Labels
 	c         []ChunkMeta
-	intervals intervals
+	intervals Intervals
 	err       error
 }
 
@@ -572,9 +614,12 @@ func (c *compactionSeriesSet) Next() bool {
 	if !c.p.Next() {
 		return false
 	}
+	var err error
+
 	c.intervals = c.tombstones.Get(c.p.At())
 
-	if c.err = c.index.Series(c.p.At(), &c.l, &c.c); c.err != nil {
+	if err = c.index.Series(c.p.At(), &c.l, &c.c); err != nil {
+		c.err = errors.Wrapf(err, "get series %d", c.p.At())
 		return false
 	}
 
@@ -582,7 +627,7 @@ func (c *compactionSeriesSet) Next() bool {
 	if len(c.intervals) > 0 {
 		chks := make([]ChunkMeta, 0, len(c.c))
 		for _, chk := range c.c {
-			if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) {
+			if !(Interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) {
 				chks = append(chks, chk)
 			}
 		}
@@ -593,8 +638,9 @@ func (c *compactionSeriesSet) Next() bool {
 	for i := range c.c {
 		chk := &c.c[i]
 
-		chk.Chunk, c.err = c.chunks.Chunk(chk.Ref)
-		if c.err != nil {
+		chk.Chunk, err = c.chunks.Chunk(chk.Ref)
+		if err != nil {
+			c.err = errors.Wrapf(err, "chunk %d not found", chk.Ref)
 			return false
 		}
 	}
@@ -609,7 +655,7 @@ func (c *compactionSeriesSet) Err() error {
 	return c.p.Err()
 }
 
-func (c *compactionSeriesSet) At() (labels.Labels, []ChunkMeta, intervals) {
+func (c *compactionSeriesSet) At() (labels.Labels, []ChunkMeta, Intervals) {
 	return c.l, c.c, c.intervals
 }
 
@@ -619,7 +665,7 @@ type compactionMerger struct {
 	aok, bok  bool
 	l         labels.Labels
 	c         []ChunkMeta
-	intervals intervals
+	intervals Intervals
 }
 
 type compactionSeries struct {
@@ -700,7 +746,7 @@ func (c *compactionMerger) Err() error {
 	return c.b.Err()
 }
 
-func (c *compactionMerger) At() (labels.Labels, []ChunkMeta, intervals) {
+func (c *compactionMerger) At() (labels.Labels, []ChunkMeta, Intervals) {
 	return c.l, c.c, c.intervals
 }
 
diff --git a/vendor/github.com/prometheus/tsdb/db.go b/vendor/github.com/prometheus/tsdb/db.go
index 8d581cdfa5..ad034d8b02 100644
--- a/vendor/github.com/prometheus/tsdb/db.go
+++ b/vendor/github.com/prometheus/tsdb/db.go
@@ -21,10 +21,8 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	"runtime"
 	"sort"
 	"strconv"
-	"strings"
 	"sync"
 	"time"
 	"unsafe"
@@ -77,11 +75,11 @@ type Appender interface {
 	// to AddFast() at any point. Adding the sample via Add() returns a new
 	// reference number.
 	// If the reference is the empty string it must not be used for caching.
-	Add(l labels.Labels, t int64, v float64) (string, error)
+	Add(l labels.Labels, t int64, v float64) (uint64, error)
 
 	// Add adds a sample pair for the referenced series. It is generally faster
 	// than adding a sample by providing its full label set.
-	AddFast(ref string, t int64, v float64) error
+	AddFast(ref uint64, t int64, v float64) error
 
 	// Commit submits the collected samples and purges the batch.
 	Commit() error
@@ -100,18 +98,14 @@ type DB struct {
 	metrics   *dbMetrics
 	opts      *Options
 	chunkPool chunks.Pool
+	compactor Compactor
+	wal       WAL
 
 	// Mutex for that must be held when modifying the general block layout.
 	mtx    sync.RWMutex
-	blocks []Block
+	blocks []DiskBlock
 
-	// Mutex that must be held when modifying just the head blocks
-	// or the general layout.
-	// mtx must be held before acquiring.
-	headmtx sync.RWMutex
-	heads   []headBlock
-
-	compactor Compactor
+	head *Head
 
 	compactc chan struct{}
 	donec    chan struct{}
@@ -123,22 +117,15 @@ type DB struct {
 }
 
 type dbMetrics struct {
-	activeAppenders      prometheus.Gauge
 	loadedBlocks         prometheus.GaugeFunc
 	reloads              prometheus.Counter
 	reloadsFailed        prometheus.Counter
-	reloadDuration       prometheus.Summary
-	samplesAppended      prometheus.Counter
 	compactionsTriggered prometheus.Counter
 }
 
 func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
 	m := &dbMetrics{}
 
-	m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
-		Name: "tsdb_active_appenders",
-		Help: "Number of currently active appender transactions",
-	})
 	m.loadedBlocks = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
 		Name: "tsdb_blocks_loaded",
 		Help: "Number of currently loaded data blocks",
@@ -155,14 +142,6 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
 		Name: "tsdb_reloads_failures_total",
 		Help: "Number of times the database failed to reload black data from disk.",
 	})
-	m.reloadDuration = prometheus.NewSummary(prometheus.SummaryOpts{
-		Name: "tsdb_reload_duration_seconds",
-		Help: "Duration of block reloads.",
-	})
-	m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
-		Name: "tsdb_samples_appended_total",
-		Help: "Total number of appended sampledb.",
-	})
 	m.compactionsTriggered = prometheus.NewCounter(prometheus.CounterOpts{
 		Name: "tsdb_compactions_triggered_total",
 		Help: "Total number of triggered compactions for the partition.",
@@ -170,12 +149,9 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
 
 	if r != nil {
 		r.MustRegister(
-			m.activeAppenders,
 			m.loadedBlocks,
 			m.reloads,
 			m.reloadsFailed,
-			m.reloadDuration,
-			m.samplesAppended,
 			m.compactionsTriggered,
 		)
 	}
@@ -187,12 +163,10 @@ func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options) (db
 	if err := os.MkdirAll(dir, 0777); err != nil {
 		return nil, err
 	}
-
 	if l == nil {
 		l = log.NewLogfmtLogger(os.Stdout)
 		l = log.With(l, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller)
 	}
-
 	if opts == nil {
 		opts = DefaultOptions
 	}
@@ -224,29 +198,26 @@ func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options) (db
 		db.lockf = &lockf
 	}
 
-	copts := &LeveledCompactorOptions{
-		blockRanges: opts.BlockRanges,
-		chunkPool:   db.chunkPool,
+	db.compactor, err = NewLeveledCompactor(r, l, opts.BlockRanges, db.chunkPool)
+	if err != nil {
+		return nil, errors.Wrap(err, "create leveled compactor")
 	}
 
-	if len(copts.blockRanges) == 0 {
-		return nil, errors.New("at least one block-range must exist")
-	}
-
-	for float64(copts.blockRanges[len(copts.blockRanges)-1])/float64(opts.RetentionDuration) > 0.2 {
-		if len(copts.blockRanges) == 1 {
-			break
-		}
-
-		// Max overflow is restricted to 20%.
-		copts.blockRanges = copts.blockRanges[:len(copts.blockRanges)-1]
-	}
-
-	db.compactor = NewLeveledCompactor(r, l, copts)
-
-	if err := db.reloadBlocks(); err != nil {
+	wal, err := OpenSegmentWAL(filepath.Join(dir, "wal"), l, 10*time.Second)
+	if err != nil {
 		return nil, err
 	}
+	db.head, err = NewHead(r, l, wal, opts.BlockRanges[0])
+	if err != nil {
+		return nil, err
+	}
+	if err := db.reload(); err != nil {
+		return nil, err
+	}
+	if err := db.head.ReadWAL(); err != nil {
+		return nil, errors.Wrap(err, "read WAL")
+	}
+
 	go db.run()
 
 	return db, nil
@@ -260,12 +231,17 @@ func (db *DB) Dir() string {
 func (db *DB) run() {
 	defer close(db.donec)
 
-	tick := time.NewTicker(30 * time.Second)
-	defer tick.Stop()
+	backoff := time.Duration(0)
 
 	for {
 		select {
-		case <-tick.C:
+		case <-db.stopc:
+			return
+		case <-time.After(backoff):
+		}
+
+		select {
+		case <-time.After(1 * time.Minute):
 			select {
 			case db.compactc <- struct{}{}:
 			default:
@@ -273,20 +249,20 @@ func (db *DB) run() {
 		case <-db.compactc:
 			db.metrics.compactionsTriggered.Inc()
 
-			changes1, err := db.retentionCutoff()
-			if err != nil {
-				db.logger.Log("msg", "retention cutoff failed", "err", err)
+			_, err1 := db.retentionCutoff()
+			if err1 != nil {
+				db.logger.Log("msg", "retention cutoff failed", "err", err1)
 			}
 
-			changes2, err := db.compact()
-			if err != nil {
-				db.logger.Log("msg", "compaction failed", "err", err)
+			_, err2 := db.compact()
+			if err2 != nil {
+				db.logger.Log("msg", "compaction failed", "err", err2)
 			}
 
-			if changes1 || changes2 {
-				if err := db.reloadBlocks(); err != nil {
-					db.logger.Log("msg", "reloading blocks failed", "err", err)
-				}
+			if err1 != nil || err2 != nil {
+				backoff = exponential(backoff, 1*time.Second, 1*time.Minute)
+			} else {
+				backoff = 0
 			}
 
 		case <-db.stopc:
@@ -303,74 +279,40 @@ func (db *DB) retentionCutoff() (bool, error) {
 	db.mtx.RLock()
 	defer db.mtx.RUnlock()
 
-	// We only consider the already persisted blocks. Head blocks generally
-	// only account for a fraction of the total data.
-	db.headmtx.RLock()
-	lenp := len(db.blocks) - len(db.heads)
-	db.headmtx.RUnlock()
-
-	if lenp == 0 {
+	if len(db.blocks) == 0 {
 		return false, nil
 	}
 
-	last := db.blocks[lenp-1]
+	last := db.blocks[len(db.blocks)-1]
 	mint := last.Meta().MaxTime - int64(db.opts.RetentionDuration)
 
 	return retentionCutoff(db.dir, mint)
 }
 
-// headFullness returns up to which fraction of a blocks time range samples
-// were already inserted.
-func headFullness(h headBlock) float64 {
-	m := h.Meta()
-	a := float64(h.HighTimestamp() - m.MinTime)
-	b := float64(m.MaxTime - m.MinTime)
-	return a / b
+// Appender opens a new appender against the database.
+func (db *DB) Appender() Appender {
+	return dbAppender{db: db, Appender: db.head.Appender()}
 }
 
-// appendableHeads returns a copy of a slice of HeadBlocks that can still be appended to.
-func (db *DB) appendableHeads() (r []headBlock) {
-	switch l := len(db.heads); l {
-	case 0:
-	case 1:
-		r = append(r, db.heads[0])
-	default:
-		if headFullness(db.heads[l-1]) < 0.5 {
-			r = append(r, db.heads[l-2])
-		}
-		r = append(r, db.heads[l-1])
-	}
-	return r
+// dbAppender wraps the DB's head appender and triggers compactions on commit
+// if necessary.
+type dbAppender struct {
+	Appender
+	db *DB
 }
 
-func (db *DB) completedHeads() (r []headBlock) {
-	db.mtx.RLock()
-	defer db.mtx.RUnlock()
+func (a dbAppender) Commit() error {
+	err := a.Appender.Commit()
 
-	db.headmtx.RLock()
-	defer db.headmtx.RUnlock()
-
-	if len(db.heads) < 2 {
-		return nil
-	}
-
-	// Select all old heads unless they still have pending appenders.
-	for _, h := range db.heads[:len(db.heads)-2] {
-		if h.ActiveWriters() > 0 {
-			return r
+	// We could just run this check every few minutes practically. But for benchmarks
+	// and high frequency use cases this is the safer way.
+	if a.db.head.MaxTime()-a.db.head.MinTime() > a.db.head.chunkRange/2*3 {
+		select {
+		case a.db.compactc <- struct{}{}:
+		default:
 		}
-		r = append(r, h)
 	}
-	// Add the 2nd last head if the last head is more than 50% filled.
-	// Compacting it early allows us to free its memory before allocating
-	// more for the next block and thus reduces spikes.
-	h0 := db.heads[len(db.heads)-1]
-	h1 := db.heads[len(db.heads)-2]
-
-	if headFullness(h0) >= 0.5 && h1.ActiveWriters() == 0 {
-		r = append(r, h1)
-	}
-	return r
+	return err
 }
 
 func (db *DB) compact() (changes bool, err error) {
@@ -383,22 +325,33 @@ func (db *DB) compact() (changes bool, err error) {
 
 	// Check whether we have pending head blocks that are ready to be persisted.
 	// They have the highest priority.
-	for _, h := range db.completedHeads() {
+	for {
 		select {
 		case <-db.stopc:
 			return changes, nil
 		default:
 		}
+		// The head has a compactable range if 1.5 level 0 ranges are between the oldest
+		// and newest timestamp. The 0.5 acts as a buffer of the appendable window.
+		if db.head.MaxTime()-db.head.MinTime() <= db.opts.BlockRanges[0]/2*3 {
+			break
+		}
+		mint, maxt := rangeForTimestamp(db.head.MinTime(), db.opts.BlockRanges[0])
 
-		if err = db.compactor.Write(db.dir, h); err != nil {
+		// Wrap head into a range that bounds all reads to it.
+		head := &rangeHead{
+			head: db.head,
+			mint: mint,
+			maxt: maxt,
+		}
+		if err = db.compactor.Write(db.dir, head, mint, maxt); err != nil {
 			return changes, errors.Wrap(err, "persist head block")
 		}
 		changes = true
 
-		if err := os.RemoveAll(h.Dir()); err != nil {
-			return changes, errors.Wrap(err, "delete compacted head block")
+		if err := db.reload(); err != nil {
+			return changes, errors.Wrap(err, "reload blocks")
 		}
-		runtime.GC()
 	}
 
 	// Check for compactions of multiple blocks.
@@ -427,7 +380,10 @@ func (db *DB) compact() (changes bool, err error) {
 				return changes, errors.Wrap(err, "delete compacted block")
 			}
 		}
-		runtime.GC()
+
+		if err := db.reload(); err != nil {
+			return changes, errors.Wrap(err, "reload blocks")
+		}
 	}
 
 	return changes, nil
@@ -469,7 +425,7 @@ func retentionCutoff(dir string, mint int64) (bool, error) {
 	return changes, fileutil.Fsync(df)
 }
 
-func (db *DB) getBlock(id ulid.ULID) (Block, bool) {
+func (db *DB) getBlock(id ulid.ULID) (DiskBlock, bool) {
 	for _, b := range db.blocks {
 		if b.Meta().ULID == id {
 			return b, true
@@ -478,30 +434,23 @@ func (db *DB) getBlock(id ulid.ULID) (Block, bool) {
 	return nil, false
 }
 
-func (db *DB) reloadBlocks() (err error) {
-	defer func(t time.Time) {
+func (db *DB) reload() (err error) {
+	defer func() {
 		if err != nil {
 			db.metrics.reloadsFailed.Inc()
 		}
 		db.metrics.reloads.Inc()
-		db.metrics.reloadDuration.Observe(time.Since(t).Seconds())
-	}(time.Now())
+	}()
 
 	var cs []io.Closer
 	defer func() { closeAll(cs...) }()
 
-	db.mtx.Lock()
-	defer db.mtx.Unlock()
-
-	db.headmtx.Lock()
-	defer db.headmtx.Unlock()
-
 	dirs, err := blockDirs(db.dir)
 	if err != nil {
 		return errors.Wrap(err, "find blocks")
 	}
 	var (
-		blocks []Block
+		blocks []DiskBlock
 		exist  = map[ulid.ULID]struct{}{}
 	)
 
@@ -513,11 +462,7 @@ func (db *DB) reloadBlocks() (err error) {
 
 		b, ok := db.getBlock(meta.ULID)
 		if !ok {
-			if meta.Compaction.Level == 0 {
-				b, err = db.openHeadBlock(dir)
-			} else {
-				b, err = newPersistedBlock(dir, db.chunkPool)
-			}
+			b, err = newPersistedBlock(dir, db.chunkPool)
 			if err != nil {
 				return errors.Wrapf(err, "open block %s", dir)
 			}
@@ -532,25 +477,29 @@ func (db *DB) reloadBlocks() (err error) {
 	}
 
 	// Close all opened blocks that no longer exist after we returned all locks.
+	// TODO(fabxc: probably races with querier still reading from them. Can
+	// we just abandon them and have the open FDs be GC'd automatically eventually?
 	for _, b := range db.blocks {
 		if _, ok := exist[b.Meta().ULID]; !ok {
 			cs = append(cs, b)
 		}
 	}
 
+	db.mtx.Lock()
 	db.blocks = blocks
-	db.heads = nil
+	db.mtx.Unlock()
 
-	for _, b := range blocks {
-		if b.Meta().Compaction.Level == 0 {
-			db.heads = append(db.heads, b.(*HeadBlock))
-		}
+	// Garbage collect data in the head if the most recent persisted block
+	// covers data of its current time range.
+	if len(blocks) == 0 {
+		return nil
 	}
+	maxt := blocks[len(db.blocks)-1].Meta().MaxTime
 
-	return nil
+	return errors.Wrap(db.head.Truncate(maxt), "head truncate failed")
 }
 
-func validateBlockSequence(bs []Block) error {
+func validateBlockSequence(bs []DiskBlock) error {
 	if len(bs) == 0 {
 		return nil
 	}
@@ -584,10 +533,10 @@ func (db *DB) Close() error {
 	var merr MultiError
 
 	merr.Add(g.Wait())
+
 	if db.lockf != nil {
 		merr.Add(db.lockf.Unlock())
 	}
-
 	return merr.Err()
 }
 
@@ -614,125 +563,48 @@ func (db *DB) Snapshot(dir string) error {
 	if dir == db.dir {
 		return errors.Errorf("cannot snapshot into base directory")
 	}
+	if _, err := ulid.Parse(dir); err == nil {
+		return errors.Errorf("dir must not be a valid ULID")
+	}
+
 	db.cmtx.Lock()
 	defer db.cmtx.Unlock()
 
-	db.mtx.Lock() // To block any appenders.
-	defer db.mtx.Unlock()
+	db.mtx.RLock()
+	defer db.mtx.RUnlock()
 
-	blocks := db.blocks[:]
-	for _, b := range blocks {
+	for _, b := range db.blocks {
 		db.logger.Log("msg", "snapshotting block", "block", b)
+
 		if err := b.Snapshot(dir); err != nil {
 			return errors.Wrap(err, "error snapshotting headblock")
 		}
 	}
 
-	return nil
+	return db.compactor.Write(dir, db.head, db.head.MinTime(), db.head.MaxTime())
 }
 
-// Appender returns a new Appender on the database.
-func (db *DB) Appender() Appender {
-	db.metrics.activeAppenders.Inc()
-
+// Querier returns a new querier over the data partition for the given time range.
+// A goroutine must not handle more than one open Querier.
+func (db *DB) Querier(mint, maxt int64) Querier {
 	db.mtx.RLock()
-	return &dbAppender{db: db}
-}
 
-type dbAppender struct {
-	db    *DB
-	heads []*metaAppender
+	blocks := db.blocksForInterval(mint, maxt)
 
-	samples int
-}
-
-type metaAppender struct {
-	meta BlockMeta
-	app  Appender
-}
-
-func (a *dbAppender) Add(lset labels.Labels, t int64, v float64) (string, error) {
-	h, err := a.appenderAt(t)
-	if err != nil {
-		return "", err
+	sq := &querier{
+		blocks: make([]Querier, 0, len(blocks)),
+		db:     db,
 	}
-	ref, err := h.app.Add(lset, t, v)
-	if err != nil {
-		return "", err
+	for _, b := range blocks {
+		sq.blocks = append(sq.blocks, &blockQuerier{
+			mint:       mint,
+			maxt:       maxt,
+			index:      b.Index(),
+			chunks:     b.Chunks(),
+			tombstones: b.Tombstones(),
+		})
 	}
-	a.samples++
-
-	if ref == "" {
-		return "", nil
-	}
-	return string(append(h.meta.ULID[:], ref...)), nil
-}
-
-func (a *dbAppender) AddFast(ref string, t int64, v float64) error {
-	if len(ref) < 16 {
-		return errors.Wrap(ErrNotFound, "invalid ref length")
-	}
-	// The first 16 bytes a ref hold the ULID of the head block.
-	h, err := a.appenderAt(t)
-	if err != nil {
-		return err
-	}
-	// Validate the ref points to the same block we got for t.
-	if string(h.meta.ULID[:]) != ref[:16] {
-		return ErrNotFound
-	}
-	if err := h.app.AddFast(ref[16:], t, v); err != nil {
-		// The block the ref points to might fit the given timestamp.
-		// We mask the error to stick with our contract.
-		if errors.Cause(err) == ErrOutOfBounds {
-			err = ErrNotFound
-		}
-		return err
-	}
-
-	a.samples++
-	return nil
-}
-
-// appenderFor gets the appender for the head containing timestamp t.
-// If the head block doesn't exist yet, it gets created.
-func (a *dbAppender) appenderAt(t int64) (*metaAppender, error) {
-	for _, h := range a.heads {
-		if intervalContains(h.meta.MinTime, h.meta.MaxTime-1, t) {
-			return h, nil
-		}
-	}
-	// Currently opened appenders do not cover t. Ensure the head block is
-	// created and add missing appenders.
-	a.db.headmtx.Lock()
-
-	if err := a.db.ensureHead(t); err != nil {
-		a.db.headmtx.Unlock()
-		return nil, err
-	}
-
-	var hb headBlock
-	for _, h := range a.db.appendableHeads() {
-		m := h.Meta()
-
-		if intervalContains(m.MinTime, m.MaxTime-1, t) {
-			hb = h
-			break
-		}
-	}
-	a.db.headmtx.Unlock()
-
-	if hb == nil {
-		return nil, ErrOutOfBounds
-	}
-	// Instantiate appender after returning headmtx!
-	app := &metaAppender{
-		meta: hb.Meta(),
-		app:  hb.Appender(),
-	}
-	a.heads = append(a.heads, app)
-
-	return app, nil
+	return sq
 }
 
 func rangeForTimestamp(t int64, width int64) (mint, maxt int64) {
@@ -740,87 +612,7 @@ func rangeForTimestamp(t int64, width int64) (mint, maxt int64) {
 	return mint, mint + width
 }
 
-// ensureHead makes sure that there is a head block for the timestamp t if
-// it is within or after the currently appendable window.
-func (db *DB) ensureHead(t int64) error {
-	var (
-		mint, maxt = rangeForTimestamp(t, int64(db.opts.BlockRanges[0]))
-		addBuffer  = len(db.blocks) == 0
-		last       BlockMeta
-	)
-
-	if !addBuffer {
-		last = db.blocks[len(db.blocks)-1].Meta()
-		addBuffer = last.MaxTime <= mint-int64(db.opts.BlockRanges[0])
-	}
-	// Create another block of buffer in front if the DB is initialized or retrieving
-	// new data after a long gap.
-	// This ensures we always have a full block width of append window.
-	if addBuffer {
-		if _, err := db.createHeadBlock(mint-int64(db.opts.BlockRanges[0]), mint); err != nil {
-			return err
-		}
-		// If the previous block reaches into our new window, make it smaller.
-	} else if mt := last.MaxTime; mt > mint {
-		mint = mt
-	}
-	if mint >= maxt {
-		return nil
-	}
-	// Error if the requested time for a head is before the appendable window.
-	if len(db.heads) > 0 && t < db.heads[0].Meta().MinTime {
-		return ErrOutOfBounds
-	}
-
-	_, err := db.createHeadBlock(mint, maxt)
-	return err
-}
-
-func (a *dbAppender) Commit() error {
-	defer a.db.metrics.activeAppenders.Dec()
-	defer a.db.mtx.RUnlock()
-
-	// Commits to partial appenders must be concurrent as concurrent appenders
-	// may have conflicting locks on head appenders.
-	// For high-throughput use cases the errgroup causes significant blocking. Typically,
-	// we just deal with a single appender and special case it.
-	var err error
-
-	switch len(a.heads) {
-	case 1:
-		err = a.heads[0].app.Commit()
-	default:
-		var g errgroup.Group
-		for _, h := range a.heads {
-			g.Go(h.app.Commit)
-		}
-		err = g.Wait()
-	}
-
-	if err != nil {
-		return err
-	}
-	// XXX(fabxc): Push the metric down into head block to account properly
-	// for partial appends?
-	a.db.metrics.samplesAppended.Add(float64(a.samples))
-
-	return nil
-}
-
-func (a *dbAppender) Rollback() error {
-	defer a.db.metrics.activeAppenders.Dec()
-	defer a.db.mtx.RUnlock()
-
-	var g errgroup.Group
-
-	for _, h := range a.heads {
-		g.Go(h.app.Rollback)
-	}
-
-	return g.Wait()
-}
-
-// Delete implements deletion of metrics.
+// Delete implements deletion of metrics. It only has atomicity guarantees on a per-block basis.
 func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	db.cmtx.Lock()
 	defer db.cmtx.Unlock()
@@ -828,16 +620,21 @@ func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	db.mtx.Lock()
 	defer db.mtx.Unlock()
 
-	blocks := db.blocksForInterval(mint, maxt)
-
 	var g errgroup.Group
 
-	for _, b := range blocks {
-		g.Go(func(b Block) func() error {
-			return func() error { return b.Delete(mint, maxt, ms...) }
-		}(b))
+	for _, b := range db.blocks {
+		m := b.Meta()
+		if intervalOverlap(mint, maxt, m.MinTime, m.MaxTime) {
+			g.Go(func(b DiskBlock) func() error {
+				return func() error { return b.Delete(mint, maxt, ms...) }
+			}(b))
+		}
 	}
 
+	g.Go(func() error {
+		return db.head.Delete(mint, maxt, ms...)
+	})
+
 	if err := g.Wait(); err != nil {
 		return err
 	}
@@ -856,8 +653,8 @@ func intervalContains(min, max, t int64) bool {
 
 // blocksForInterval returns all blocks within the partition that may contain
 // data for the given time range.
-func (db *DB) blocksForInterval(mint, maxt int64) []Block {
-	var bs []Block
+func (db *DB) blocksForInterval(mint, maxt int64) []BlockReader {
+	var bs []BlockReader
 
 	for _, b := range db.blocks {
 		m := b.Meta()
@@ -865,52 +662,13 @@ func (db *DB) blocksForInterval(mint, maxt int64) []Block {
 			bs = append(bs, b)
 		}
 	}
+	if maxt >= db.head.MinTime() {
+		bs = append(bs, db.head)
+	}
 
 	return bs
 }
 
-// openHeadBlock opens the head block at dir.
-func (db *DB) openHeadBlock(dir string) (*HeadBlock, error) {
-	var (
-		wdir = walDir(dir)
-		l    = log.With(db.logger, "wal", wdir)
-	)
-	wal, err := OpenSegmentWAL(wdir, l, 5*time.Second)
-	if err != nil {
-		return nil, errors.Wrap(err, "open WAL %s")
-	}
-
-	h, err := OpenHeadBlock(dir, log.With(db.logger, "block", dir), wal, db.compactor)
-	if err != nil {
-		return nil, errors.Wrapf(err, "open head block %s", dir)
-	}
-	return h, nil
-}
-
-// createHeadBlock starts a new head block to append to.
-func (db *DB) createHeadBlock(mint, maxt int64) (headBlock, error) {
-	dir, err := TouchHeadBlock(db.dir, mint, maxt)
-	if err != nil {
-		return nil, errors.Wrapf(err, "touch head block %s", dir)
-	}
-	newHead, err := db.openHeadBlock(dir)
-	if err != nil {
-		return nil, err
-	}
-
-	db.logger.Log("msg", "created head block", "ulid", newHead.meta.ULID, "mint", mint, "maxt", maxt)
-
-	db.blocks = append(db.blocks, newHead) // TODO(fabxc): this is a race!
-	db.heads = append(db.heads, newHead)
-
-	select {
-	case db.compactc <- struct{}{}:
-	default:
-	}
-
-	return newHead, nil
-}
-
 func isBlockDir(fi os.FileInfo) bool {
 	if !fi.IsDir() {
 		return false
@@ -934,7 +692,7 @@ func blockDirs(dir string) ([]string, error) {
 	return dirs, nil
 }
 
-func sequenceFiles(dir, prefix string) ([]string, error) {
+func sequenceFiles(dir string) ([]string, error) {
 	files, err := ioutil.ReadDir(dir)
 	if err != nil {
 		return nil, err
@@ -942,24 +700,15 @@ func sequenceFiles(dir, prefix string) ([]string, error) {
 	var res []string
 
 	for _, fi := range files {
-		if isSequenceFile(fi, prefix) {
-			res = append(res, filepath.Join(dir, fi.Name()))
+		if _, err := strconv.ParseUint(fi.Name(), 10, 64); err != nil {
+			continue
 		}
+		res = append(res, filepath.Join(dir, fi.Name()))
 	}
 	return res, nil
 }
 
-func isSequenceFile(fi os.FileInfo, prefix string) bool {
-	if !strings.HasPrefix(fi.Name(), prefix) {
-		return false
-	}
-	if _, err := strconv.ParseUint(fi.Name()[len(prefix):], 10, 32); err != nil {
-		return false
-	}
-	return true
-}
-
-func nextSequenceFile(dir, prefix string) (string, int, error) {
+func nextSequenceFile(dir string) (string, int, error) {
 	names, err := fileutil.ReadDir(dir)
 	if err != nil {
 		return "", 0, err
@@ -967,16 +716,13 @@ func nextSequenceFile(dir, prefix string) (string, int, error) {
 
 	i := uint64(0)
 	for _, n := range names {
-		if !strings.HasPrefix(n, prefix) {
-			continue
-		}
-		j, err := strconv.ParseUint(n[len(prefix):], 10, 32)
+		j, err := strconv.ParseUint(n, 10, 64)
 		if err != nil {
 			continue
 		}
 		i = j
 	}
-	return filepath.Join(dir, fmt.Sprintf("%s%0.6d", prefix, i+1)), int(i + 1), nil
+	return filepath.Join(dir, fmt.Sprintf("%0.6d", i+1)), int(i + 1), nil
 }
 
 // The MultiError type implements the error interface, and contains the
@@ -1032,3 +778,14 @@ func closeAll(cs ...io.Closer) error {
 	}
 	return merr.Err()
 }
+
+func exponential(d, min, max time.Duration) time.Duration {
+	d *= 2
+	if d < min {
+		d = min
+	}
+	if d > max {
+		d = max
+	}
+	return d
+}
diff --git a/vendor/github.com/prometheus/tsdb/encoding_helpers.go b/vendor/github.com/prometheus/tsdb/encoding_helpers.go
index 25ff32d00b..9aa4ba4097 100644
--- a/vendor/github.com/prometheus/tsdb/encoding_helpers.go
+++ b/vendor/github.com/prometheus/tsdb/encoding_helpers.go
@@ -86,7 +86,7 @@ func (d *decbuf) uvarintStr() string {
 		d.e = errInvalidSize
 		return ""
 	}
-	s := yoloString(d.b[:l])
+	s := string(d.b[:l])
 	d.b = d.b[l:]
 	return s
 }
diff --git a/vendor/github.com/prometheus/tsdb/head.go b/vendor/github.com/prometheus/tsdb/head.go
index 045378d9cb..a74552bcaf 100644
--- a/vendor/github.com/prometheus/tsdb/head.go
+++ b/vendor/github.com/prometheus/tsdb/head.go
@@ -14,21 +14,16 @@
 package tsdb
 
 import (
-	"fmt"
 	"math"
-	"math/rand"
-	"os"
-	"path/filepath"
+	"runtime"
 	"sort"
 	"sync"
 	"sync/atomic"
 	"time"
 
-	"encoding/binary"
-
 	"github.com/go-kit/kit/log"
-	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/labels"
 )
@@ -50,107 +45,171 @@ var (
 	ErrOutOfBounds = errors.New("out of bounds")
 )
 
-// HeadBlock handles reads and writes of time series data within a time window.
-type HeadBlock struct {
-	mtx       sync.RWMutex
-	dir       string
-	wal       WAL
-	compactor Compactor
+// Head handles reads and writes of time series data within a time window.
+type Head struct {
+	chunkRange int64
+	metrics    *headMetrics
+	wal        WAL
+	logger     log.Logger
+	appendPool sync.Pool
 
-	activeWriters uint64
-	highTimestamp int64
-	closed        bool
+	minTime, maxTime int64
+	lastSeriesID     uint64
 
-	// descs holds all chunk descs for the head block. Each chunk implicitly
-	// is assigned the index as its ID.
-	series []*memSeries
-	// hashes contains a collision map of label set hashes of chunks
-	// to their chunk descs.
-	hashes map[uint64][]*memSeries
+	// All series addressable by their ID or hash.
+	series *stripeSeries
 
-	symbols  map[string]struct{}
-	values   map[string]stringset // label names to possible values
-	postings *memPostings         // postings lists for terms
+	symMtx  sync.RWMutex
+	symbols map[string]struct{}
+	values  map[string]stringset // label names to possible values
+
+	postings *memPostings // postings lists for terms
 
 	tombstones tombstoneReader
-
-	meta BlockMeta
 }
 
-// TouchHeadBlock atomically touches a new head block in dir for
-// samples in the range [mint,maxt).
-func TouchHeadBlock(dir string, mint, maxt int64) (string, error) {
-	entropy := rand.New(rand.NewSource(time.Now().UnixNano()))
-
-	ulid, err := ulid.New(ulid.Now(), entropy)
-	if err != nil {
-		return "", err
-	}
-
-	// Make head block creation appear atomic.
-	dir = filepath.Join(dir, ulid.String())
-	tmp := dir + ".tmp"
-
-	if err := os.MkdirAll(tmp, 0777); err != nil {
-		return "", err
-	}
-
-	if err := writeMetaFile(tmp, &BlockMeta{
-		ULID:    ulid,
-		MinTime: mint,
-		MaxTime: maxt,
-	}); err != nil {
-		return "", err
-	}
-
-	return dir, renameFile(tmp, dir)
+type headMetrics struct {
+	activeAppenders     prometheus.Gauge
+	series              prometheus.Gauge
+	seriesCreated       prometheus.Counter
+	seriesRemoved       prometheus.Counter
+	chunks              prometheus.Gauge
+	chunksCreated       prometheus.Gauge
+	chunksRemoved       prometheus.Gauge
+	gcDuration          prometheus.Summary
+	minTime             prometheus.GaugeFunc
+	maxTime             prometheus.GaugeFunc
+	samplesAppended     prometheus.Counter
+	walTruncateDuration prometheus.Summary
 }
 
-// OpenHeadBlock opens the head block in dir.
-func OpenHeadBlock(dir string, l log.Logger, wal WAL, c Compactor) (*HeadBlock, error) {
-	meta, err := readMetaFile(dir)
-	if err != nil {
-		return nil, err
-	}
+func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
+	m := &headMetrics{}
 
-	h := &HeadBlock{
-		dir:        dir,
+	m.activeAppenders = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_active_appenders",
+		Help: "Number of currently active appender transactions",
+	})
+	m.series = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_series",
+		Help: "Total number of series in the head block.",
+	})
+	m.seriesCreated = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_series_created_total",
+		Help: "Total number of series created in the head",
+	})
+	m.seriesRemoved = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_series_removed_total",
+		Help: "Total number of series removed in the head",
+	})
+	m.chunks = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_chunks",
+		Help: "Total number of chunks in the head block.",
+	})
+	m.chunksCreated = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_chunks_created_total",
+		Help: "Total number of chunks created in the head",
+	})
+	m.chunksRemoved = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "tsdb_head_chunks_removed_total",
+		Help: "Total number of chunks removed in the head",
+	})
+	m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
+		Name: "tsdb_head_gc_duration_seconds",
+		Help: "Runtime of garbage collection in the head block.",
+	})
+	m.minTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+		Name: "tsdb_head_max_time",
+		Help: "Maximum timestamp of the head block.",
+	}, func() float64 {
+		return float64(h.MaxTime())
+	})
+	m.maxTime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+		Name: "tsdb_head_min_time",
+		Help: "Minimum time bound of the head block.",
+	}, func() float64 {
+		return float64(h.MinTime())
+	})
+	m.walTruncateDuration = prometheus.NewSummary(prometheus.SummaryOpts{
+		Name: "tsdb_wal_truncate_duration_seconds",
+		Help: "Duration of WAL truncation.",
+	})
+	m.samplesAppended = prometheus.NewCounter(prometheus.CounterOpts{
+		Name: "tsdb_head_samples_appended_total",
+		Help: "Total number of appended sampledb.",
+	})
+
+	if r != nil {
+		r.MustRegister(
+			m.activeAppenders,
+			m.chunks,
+			m.chunksCreated,
+			m.chunksRemoved,
+			m.series,
+			m.seriesCreated,
+			m.seriesRemoved,
+			m.minTime,
+			m.maxTime,
+			m.gcDuration,
+			m.walTruncateDuration,
+			m.samplesAppended,
+		)
+	}
+	return m
+}
+
+// NewHead opens the head block in dir.
+func NewHead(r prometheus.Registerer, l log.Logger, wal WAL, chunkRange int64) (*Head, error) {
+	if l == nil {
+		l = log.NewNopLogger()
+	}
+	if wal == nil {
+		wal = NopWAL()
+	}
+	if chunkRange < 1 {
+		return nil, errors.Errorf("invalid chunk range %d", chunkRange)
+	}
+	h := &Head{
 		wal:        wal,
-		compactor:  c,
-		series:     []*memSeries{nil}, // 0 is not a valid posting, filled with nil.
-		hashes:     map[uint64][]*memSeries{},
+		logger:     l,
+		chunkRange: chunkRange,
+		minTime:    math.MinInt64,
+		maxTime:    math.MinInt64,
+		series:     newStripeSeries(),
 		values:     map[string]stringset{},
 		symbols:    map[string]struct{}{},
-		postings:   &memPostings{m: make(map[term][]uint32)},
-		meta:       *meta,
+		postings:   newMemPostings(),
 		tombstones: newEmptyTombstoneReader(),
 	}
-	return h, h.init()
+	h.metrics = newHeadMetrics(h, r)
+
+	return h, nil
 }
 
-func (h *HeadBlock) init() error {
+func (h *Head) ReadWAL() error {
 	r := h.wal.Reader()
+	mint := h.MinTime()
 
-	seriesFunc := func(series []labels.Labels) error {
-		for _, lset := range series {
-			h.create(lset.Hash(), lset)
-			h.meta.Stats.NumSeries++
+	seriesFunc := func(series []RefSeries) error {
+		for _, s := range series {
+			h.create(s.Labels.Hash(), s.Labels)
 		}
-
 		return nil
 	}
 	samplesFunc := func(samples []RefSample) error {
 		for _, s := range samples {
-			if int(s.Ref) >= len(h.series) {
-				return errors.Errorf("unknown series reference %d (max %d); abort WAL restore",
-					s.Ref, len(h.series))
+			if s.T < mint {
+				continue
 			}
-			h.series[s.Ref].append(s.T, s.V)
-
-			if !h.inBounds(s.T) {
-				return errors.Wrap(ErrOutOfBounds, "consume WAL")
+			ms := h.series.getByID(s.Ref)
+			if ms == nil {
+				return errors.Errorf("unknown series reference %d; abort WAL restore", s.Ref)
+			}
+			_, chunkCreated := ms.append(s.T, s.V)
+			if chunkCreated {
+				h.metrics.chunksCreated.Inc()
+				h.metrics.chunks.Inc()
 			}
-			h.meta.Stats.NumSamples++
 		}
 
 		return nil
@@ -158,6 +217,9 @@ func (h *HeadBlock) init() error {
 	deletesFunc := func(stones []Stone) error {
 		for _, s := range stones {
 			for _, itv := range s.intervals {
+				if itv.Maxt < mint {
+					continue
+				}
 				h.tombstones.add(s.ref, itv)
 			}
 		}
@@ -168,377 +230,229 @@ func (h *HeadBlock) init() error {
 	if err := r.Read(seriesFunc, samplesFunc, deletesFunc); err != nil {
 		return errors.Wrap(err, "consume WAL")
 	}
-
 	return nil
 }
 
-// inBounds returns true if the given timestamp is within the valid
-// time bounds of the block.
-func (h *HeadBlock) inBounds(t int64) bool {
-	return t >= h.meta.MinTime && t <= h.meta.MaxTime
-}
+// Truncate removes all data before mint from the head block and truncates its WAL.
+func (h *Head) Truncate(mint int64) error {
+	initialize := h.MinTime() == math.MinInt64
 
-func (h *HeadBlock) String() string {
-	return h.meta.ULID.String()
-}
-
-// Close syncs all data and closes underlying resources of the head block.
-func (h *HeadBlock) Close() error {
-	h.mtx.Lock()
-	defer h.mtx.Unlock()
-
-	if err := h.wal.Close(); err != nil {
-		return errors.Wrapf(err, "close WAL for head %s", h.dir)
+	if mint%h.chunkRange != 0 {
+		return errors.Errorf("truncating at %d not aligned", mint)
 	}
-	// Check whether the head block still exists in the underlying dir
-	// or has already been replaced with a compacted version or removed.
-	meta, err := readMetaFile(h.dir)
-	if os.IsNotExist(err) {
+	if h.MinTime() >= mint {
 		return nil
 	}
+	atomic.StoreInt64(&h.minTime, mint)
+
+	// Ensure that max time is at least as high as min time.
+	for h.MaxTime() < mint {
+		atomic.CompareAndSwapInt64(&h.maxTime, h.MaxTime(), mint)
+	}
+
+	// This was an initial call to Truncate after loading blocks on startup.
+	// We haven't read back the WAL yet, so do not attempt to truncate it.
+	if initialize {
+		return nil
+	}
+
+	start := time.Now()
+
+	h.gc()
+	h.logger.Log("msg", "head GC completed", "duration", time.Since(start))
+	h.metrics.gcDuration.Observe(time.Since(start).Seconds())
+
+	start = time.Now()
+
+	p, err := h.indexRange(mint, math.MaxInt64).Postings("", "")
 	if err != nil {
 		return err
 	}
-	if meta.ULID == h.meta.ULID {
-		return writeMetaFile(h.dir, &h.meta)
-	}
 
-	h.closed = true
+	if err := h.wal.Truncate(mint, p); err == nil {
+		h.logger.Log("msg", "WAL truncation completed", "duration", time.Since(start))
+	} else {
+		h.logger.Log("msg", "WAL truncation failed", "err", err, "duration", time.Since(start))
+	}
+	h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds())
+
 	return nil
 }
 
-// Meta returns a BlockMeta for the head block.
-func (h *HeadBlock) Meta() BlockMeta {
-	m := BlockMeta{
-		ULID:       h.meta.ULID,
-		MinTime:    h.meta.MinTime,
-		MaxTime:    h.meta.MaxTime,
-		Compaction: h.meta.Compaction,
+// initTime initializes a head with the first timestamp. This only needs to be called
+// for a compltely fresh head with an empty WAL.
+// Returns true if the initialization took an effect.
+func (h *Head) initTime(t int64) (initialized bool) {
+	// In the init state, the head has a high timestamp of math.MinInt64.
+	mint, _ := rangeForTimestamp(t, h.chunkRange)
+
+	if !atomic.CompareAndSwapInt64(&h.minTime, math.MinInt64, mint) {
+		return false
 	}
+	// Ensure that max time is initialized to at least the min time we just set.
+	// Concurrent appenders may already have set it to a higher value.
+	atomic.CompareAndSwapInt64(&h.maxTime, math.MinInt64, t)
 
-	m.Stats.NumChunks = atomic.LoadUint64(&h.meta.Stats.NumChunks)
-	m.Stats.NumSeries = atomic.LoadUint64(&h.meta.Stats.NumSeries)
-	m.Stats.NumSamples = atomic.LoadUint64(&h.meta.Stats.NumSamples)
-
-	return m
+	return true
 }
 
-// Tombstones returns the TombstoneReader against the block.
-func (h *HeadBlock) Tombstones() TombstoneReader {
-	return h.tombstones
+// initAppender is a helper to initialize the time bounds of a the head
+// upon the first sample it receives.
+type initAppender struct {
+	app  Appender
+	head *Head
 }
 
-// Delete implements headBlock.
-func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error {
-	ir := h.Index()
-
-	pr := newPostingsReader(ir)
-	p, absent := pr.Select(ms...)
-
-	var stones []Stone
-
-Outer:
-	for p.Next() {
-		ref := p.At()
-		lset := h.series[ref].lset
-		for _, abs := range absent {
-			if lset.Get(abs) != "" {
-				continue Outer
-			}
-		}
-
-		// Delete only until the current values and not beyond.
-		tmin, tmax := clampInterval(mint, maxt, h.series[ref].chunks[0].minTime, h.series[ref].head().maxTime)
-		stones = append(stones, Stone{ref, intervals{{tmin, tmax}}})
+func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
+	if a.app != nil {
+		return a.app.Add(lset, t, v)
 	}
+	a.head.initTime(t)
+	a.app = a.head.appender()
 
-	if p.Err() != nil {
-		return p.Err()
-	}
-	if err := h.wal.LogDeletes(stones); err != nil {
-		return err
-	}
-
-	for _, s := range stones {
-		h.tombstones.add(s.ref, s.intervals[0])
-	}
-
-	h.meta.Stats.NumTombstones = uint64(len(h.tombstones))
-	return nil
+	return a.app.Add(lset, t, v)
 }
 
-// Snapshot persists the current state of the headblock to the given directory.
-// Callers must ensure that there are no active appenders against the block.
-// DB does this by acquiring its own write lock.
-func (h *HeadBlock) Snapshot(snapshotDir string) error {
-	if h.meta.Stats.NumSeries == 0 {
+func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
+	if a.app == nil {
+		return ErrNotFound
+	}
+	return a.app.AddFast(ref, t, v)
+}
+
+func (a *initAppender) Commit() error {
+	if a.app == nil {
 		return nil
 	}
-
-	return h.compactor.Write(snapshotDir, h)
+	return a.app.Commit()
 }
 
-// Dir returns the directory of the block.
-func (h *HeadBlock) Dir() string { return h.dir }
-
-// Index returns an IndexReader against the block.
-func (h *HeadBlock) Index() IndexReader {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
-
-	return &headIndexReader{HeadBlock: h, maxSeries: uint32(len(h.series) - 1)}
-}
-
-// Chunks returns a ChunkReader against the block.
-func (h *HeadBlock) Chunks() ChunkReader { return &headChunkReader{h} }
-
-// Querier returns a new Querier against the block for the range [mint, maxt].
-func (h *HeadBlock) Querier(mint, maxt int64) Querier {
-	h.mtx.RLock()
-	if h.closed {
-		panic(fmt.Sprintf("block %s already closed", h.dir))
+func (a *initAppender) Rollback() error {
+	if a.app == nil {
+		return nil
 	}
-	h.mtx.RUnlock()
+	return a.app.Rollback()
+}
 
-	return &blockQuerier{
-		mint:       mint,
-		maxt:       maxt,
-		index:      h.Index(),
-		chunks:     h.Chunks(),
-		tombstones: h.Tombstones(),
+// Appender returns a new Appender on the database.
+func (h *Head) Appender() Appender {
+	h.metrics.activeAppenders.Inc()
+
+	// The head cache might not have a starting point yet. The init appender
+	// picks up the first appended timestamp as the base.
+	if h.MinTime() == math.MinInt64 {
+		return &initAppender{head: h}
+	}
+	return h.appender()
+}
+
+func (h *Head) appender() *headAppender {
+	return &headAppender{
+		head:          h,
+		mint:          h.MaxTime() - h.chunkRange/2,
+		samples:       h.getAppendBuffer(),
+		highTimestamp: math.MinInt64,
 	}
 }
 
-// Appender returns a new Appender against the head block.
-func (h *HeadBlock) Appender() Appender {
-	atomic.AddUint64(&h.activeWriters, 1)
-
-	h.mtx.RLock()
-
-	if h.closed {
-		panic(fmt.Sprintf("block %s already closed", h.dir))
-	}
-	return &headAppender{HeadBlock: h, samples: getHeadAppendBuffer()}
-}
-
-// ActiveWriters returns true if the block has open write transactions.
-func (h *HeadBlock) ActiveWriters() int {
-	return int(atomic.LoadUint64(&h.activeWriters))
-}
-
-// HighTimestamp returns the highest inserted sample timestamp.
-func (h *HeadBlock) HighTimestamp() int64 {
-	return atomic.LoadInt64(&h.highTimestamp)
-}
-
-var headPool = sync.Pool{}
-
-func getHeadAppendBuffer() []RefSample {
-	b := headPool.Get()
+func (h *Head) getAppendBuffer() []RefSample {
+	b := h.appendPool.Get()
 	if b == nil {
 		return make([]RefSample, 0, 512)
 	}
 	return b.([]RefSample)
 }
 
-func putHeadAppendBuffer(b []RefSample) {
-	headPool.Put(b[:0])
+func (h *Head) putAppendBuffer(b []RefSample) {
+	h.appendPool.Put(b[:0])
 }
 
 type headAppender struct {
-	*HeadBlock
-
-	newSeries []*hashedLabels
-	newLabels []labels.Labels
-	newHashes map[uint64]uint64
+	head *Head
+	mint int64
 
+	series        []RefSeries
 	samples       []RefSample
 	highTimestamp int64
 }
 
-type hashedLabels struct {
-	ref    uint64
-	hash   uint64
-	labels labels.Labels
-}
-
-func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (string, error) {
-	if !a.inBounds(t) {
-		return "", ErrOutOfBounds
+func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
+	if t < a.mint {
+		return 0, ErrOutOfBounds
 	}
-
 	hash := lset.Hash()
-	refb := make([]byte, 8)
 
-	// Series exists already in the block.
-	if ms := a.get(hash, lset); ms != nil {
-		binary.BigEndian.PutUint64(refb, uint64(ms.ref))
-		return string(refb), a.AddFast(string(refb), t, v)
+	s := a.head.series.getByHash(hash, lset)
+
+	if s == nil {
+		s = a.head.create(hash, lset)
+
+		a.series = append(a.series, RefSeries{
+			Ref:    s.ref,
+			Labels: lset,
+			hash:   hash,
+		})
 	}
-	// Series was added in this transaction previously.
-	if ref, ok := a.newHashes[hash]; ok {
-		binary.BigEndian.PutUint64(refb, ref)
-		// XXX(fabxc): there's no fast path for multiple samples for the same new series
-		// in the same transaction. We always return the invalid empty ref. It's has not
-		// been a relevant use case so far and is not worth the trouble.
-		return "", a.AddFast(string(refb), t, v)
-	}
-
-	// The series is completely new.
-	if a.newSeries == nil {
-		a.newHashes = map[uint64]uint64{}
-	}
-	// First sample for new series.
-	ref := uint64(len(a.newSeries))
-
-	a.newSeries = append(a.newSeries, &hashedLabels{
-		ref:    ref,
-		hash:   hash,
-		labels: lset,
-	})
-	// First bit indicates its a series created in this transaction.
-	ref |= (1 << 63)
-
-	a.newHashes[hash] = ref
-	binary.BigEndian.PutUint64(refb, ref)
-
-	return "", a.AddFast(string(refb), t, v)
+	return s.ref, a.AddFast(s.ref, t, v)
 }
 
-func (a *headAppender) AddFast(ref string, t int64, v float64) error {
-	if len(ref) != 8 {
-		return errors.Wrap(ErrNotFound, "invalid ref length")
+func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
+	s := a.head.series.getByID(ref)
+
+	if s == nil {
+		return errors.Wrap(ErrNotFound, "unknown series")
 	}
-	var (
-		refn = binary.BigEndian.Uint64(yoloBytes(ref))
-		id   = (refn << 1) >> 1
-		inTx = refn&(1<<63) != 0
-	)
-	// Distinguish between existing series and series created in
-	// this transaction.
-	if inTx {
-		if id > uint64(len(a.newSeries)-1) {
-			return errors.Wrap(ErrNotFound, "transaction series ID too high")
-		}
-		// TODO(fabxc): we also have to validate here that the
-		// sample sequence is valid.
-		// We also have to revalidate it as we switch locks and create
-		// the new series.
-	} else if id > uint64(len(a.series)) {
-		return errors.Wrap(ErrNotFound, "transaction series ID too high")
-	} else {
-		ms := a.series[id]
-		if ms == nil {
-			return errors.Wrap(ErrNotFound, "nil series")
-		}
-		// TODO(fabxc): memory series should be locked here already.
-		// Only problem is release of locks in case of a rollback.
-		c := ms.head()
-
-		if !a.inBounds(t) {
-			return ErrOutOfBounds
-		}
-		if t < c.maxTime {
-			return ErrOutOfOrderSample
-		}
-
-		// We are allowing exact duplicates as we can encounter them in valid cases
-		// like federation and erroring out at that time would be extremely noisy.
-		if c.maxTime == t && math.Float64bits(ms.lastValue) != math.Float64bits(v) {
-			return ErrAmendSample
-		}
+	if err := s.appendable(t, v); err != nil {
+		return err
 	}
 
+	if t < a.mint {
+		return ErrOutOfBounds
+	}
 	if t > a.highTimestamp {
 		a.highTimestamp = t
 	}
 
 	a.samples = append(a.samples, RefSample{
-		Ref: refn,
-		T:   t,
-		V:   v,
+		Ref:    ref,
+		T:      t,
+		V:      v,
+		series: s,
 	})
 	return nil
 }
 
-func (a *headAppender) createSeries() error {
-	if len(a.newSeries) == 0 {
-		return nil
-	}
-	a.newLabels = make([]labels.Labels, 0, len(a.newSeries))
-	base0 := len(a.series)
-
-	a.mtx.RUnlock()
-	defer a.mtx.RLock()
-	a.mtx.Lock()
-	defer a.mtx.Unlock()
-
-	base1 := len(a.series)
-
-	for _, l := range a.newSeries {
-		// We switched locks and have to re-validate that the series were not
-		// created by another goroutine in the meantime.
-		if base1 > base0 {
-			if ms := a.get(l.hash, l.labels); ms != nil {
-				l.ref = uint64(ms.ref)
-				continue
-			}
-		}
-		// Series is still new.
-		a.newLabels = append(a.newLabels, l.labels)
-		l.ref = uint64(len(a.series))
-
-		a.create(l.hash, l.labels)
-	}
-
-	// Write all new series to the WAL.
-	if err := a.wal.LogSeries(a.newLabels); err != nil {
-		return errors.Wrap(err, "WAL log series")
-	}
-
-	return nil
-}
-
 func (a *headAppender) Commit() error {
-	defer atomic.AddUint64(&a.activeWriters, ^uint64(0))
-	defer putHeadAppendBuffer(a.samples)
-	defer a.mtx.RUnlock()
+	defer a.Rollback()
 
-	if err := a.createSeries(); err != nil {
+	if err := a.head.wal.LogSeries(a.series); err != nil {
 		return err
 	}
-
-	// We have to update the refs of samples for series we just created.
-	for i := range a.samples {
-		s := &a.samples[i]
-		if s.Ref&(1<<63) != 0 {
-			s.Ref = a.newSeries[(s.Ref<<1)>>1].ref
-		}
-	}
-
-	// Write all new samples to the WAL and add them to the
-	// in-mem database on success.
-	if err := a.wal.LogSamples(a.samples); err != nil {
+	if err := a.head.wal.LogSamples(a.samples); err != nil {
 		return errors.Wrap(err, "WAL log samples")
 	}
 
-	total := uint64(len(a.samples))
+	total := len(a.samples)
 
 	for _, s := range a.samples {
-		if !a.series[s.Ref].append(s.T, s.V) {
+		ok, chunkCreated := s.series.append(s.T, s.V)
+		if !ok {
 			total--
 		}
+		if chunkCreated {
+			a.head.metrics.chunks.Inc()
+			a.head.metrics.chunksCreated.Inc()
+		}
 	}
 
-	atomic.AddUint64(&a.meta.Stats.NumSamples, total)
-	atomic.AddUint64(&a.meta.Stats.NumSeries, uint64(len(a.newSeries)))
+	a.head.metrics.samplesAppended.Add(float64(total))
 
 	for {
-		ht := a.HeadBlock.HighTimestamp()
+		ht := a.head.MaxTime()
 		if a.highTimestamp <= ht {
 			break
 		}
-		if atomic.CompareAndSwapInt64(&a.HeadBlock.highTimestamp, ht, a.highTimestamp) {
+		if atomic.CompareAndSwapInt64(&a.head.maxTime, ht, a.highTimestamp) {
 			break
 		}
 	}
@@ -547,69 +461,297 @@ func (a *headAppender) Commit() error {
 }
 
 func (a *headAppender) Rollback() error {
-	a.mtx.RUnlock()
-	atomic.AddUint64(&a.activeWriters, ^uint64(0))
-	putHeadAppendBuffer(a.samples)
+	a.head.metrics.activeAppenders.Dec()
+	a.head.putAppendBuffer(a.samples)
+
 	return nil
 }
 
+// Delete all samples in the range of [mint, maxt] for series that satisfy the given
+// label matchers.
+func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
+	// Do not delete anything beyond the currently valid range.
+	mint, maxt = clampInterval(mint, maxt, h.MinTime(), h.MaxTime())
+
+	ir := h.indexRange(mint, maxt)
+
+	pr := newPostingsReader(ir)
+	p, absent := pr.Select(ms...)
+
+	var stones []Stone
+
+Outer:
+	for p.Next() {
+		series := h.series.getByID(p.At())
+
+		for _, abs := range absent {
+			if series.lset.Get(abs) != "" {
+				continue Outer
+			}
+		}
+
+		// Delete only until the current values and not beyond.
+		t0, t1 := clampInterval(mint, maxt, series.minTime(), series.maxTime())
+		stones = append(stones, Stone{p.At(), Intervals{{t0, t1}}})
+	}
+
+	if p.Err() != nil {
+		return p.Err()
+	}
+	if err := h.wal.LogDeletes(stones); err != nil {
+		return err
+	}
+	for _, s := range stones {
+		h.tombstones.add(s.ref, s.intervals[0])
+	}
+	return nil
+}
+
+// gc removes data before the minimum timestmap from the head.
+func (h *Head) gc() {
+	defer runtime.GC()
+
+	// Only data strictly lower than this timestamp must be deleted.
+	mint := h.MinTime()
+
+	// Drop old chunks and remember series IDs and hashes if they can be
+	// deleted entirely.
+	deleted, chunksRemoved := h.series.gc(mint)
+	seriesRemoved := len(deleted)
+
+	h.metrics.seriesRemoved.Add(float64(seriesRemoved))
+	h.metrics.series.Sub(float64(seriesRemoved))
+	h.metrics.chunksRemoved.Add(float64(chunksRemoved))
+	h.metrics.chunks.Sub(float64(chunksRemoved))
+
+	// Remove deleted series IDs from the postings lists. First do a collection
+	// run where we rebuild all postings that have something to delete
+	h.postings.mtx.RLock()
+
+	type replEntry struct {
+		idx int
+		l   []uint64
+	}
+	collected := map[labels.Label]replEntry{}
+
+	for t, p := range h.postings.m {
+		repl := replEntry{idx: len(p)}
+
+		for i, id := range p {
+			if _, ok := deleted[id]; ok {
+				// First ID that got deleted, initialize replacement with
+				// all remaining IDs so far.
+				if repl.l == nil {
+					repl.l = make([]uint64, 0, len(p))
+					repl.l = append(repl.l, p[:i]...)
+				}
+				continue
+			}
+			// Only add to the replacement once we know we have to do it.
+			if repl.l != nil {
+				repl.l = append(repl.l, id)
+			}
+		}
+		if repl.l != nil {
+			collected[t] = repl
+		}
+	}
+
+	h.postings.mtx.RUnlock()
+
+	// Replace all postings that have changed. Append all IDs that may have
+	// been added while we switched locks.
+	h.postings.mtx.Lock()
+
+	for t, repl := range collected {
+		l := append(repl.l, h.postings.m[t][repl.idx:]...)
+
+		if len(l) > 0 {
+			h.postings.m[t] = l
+		} else {
+			delete(h.postings.m, t)
+		}
+	}
+
+	h.postings.mtx.Unlock()
+
+	// Rebuild symbols and label value indices from what is left in the postings terms.
+	h.postings.mtx.RLock()
+
+	symbols := make(map[string]struct{}, len(h.symbols))
+	values := make(map[string]stringset, len(h.values))
+
+	for t := range h.postings.m {
+		symbols[t.Name] = struct{}{}
+		symbols[t.Value] = struct{}{}
+
+		ss, ok := values[t.Name]
+		if !ok {
+			ss = stringset{}
+			values[t.Name] = ss
+		}
+		ss.set(t.Value)
+	}
+
+	h.postings.mtx.RUnlock()
+
+	h.symMtx.Lock()
+
+	h.symbols = symbols
+	h.values = values
+
+	h.symMtx.Unlock()
+}
+
+func (h *Head) Tombstones() TombstoneReader {
+	return h.tombstones
+}
+
+// Index returns an IndexReader against the block.
+func (h *Head) Index() IndexReader {
+	return h.indexRange(math.MinInt64, math.MaxInt64)
+}
+
+func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
+	if hmin := h.MinTime(); hmin > mint {
+		mint = hmin
+	}
+	return &headIndexReader{head: h, mint: mint, maxt: maxt}
+}
+
+// Chunks returns a ChunkReader against the block.
+func (h *Head) Chunks() ChunkReader {
+	return h.chunksRange(math.MinInt64, math.MaxInt64)
+}
+
+func (h *Head) chunksRange(mint, maxt int64) *headChunkReader {
+	if hmin := h.MinTime(); hmin > mint {
+		mint = hmin
+	}
+	return &headChunkReader{head: h, mint: mint, maxt: maxt}
+}
+
+// MinTime returns the lowest time bound on visible data in the head.
+func (h *Head) MinTime() int64 {
+	return atomic.LoadInt64(&h.minTime)
+}
+
+// MaxTime returns the highest timestamp seen in data of the head.
+func (h *Head) MaxTime() int64 {
+	return atomic.LoadInt64(&h.maxTime)
+}
+
 type headChunkReader struct {
-	*HeadBlock
+	head       *Head
+	mint, maxt int64
+}
+
+func (h *headChunkReader) Close() error {
+	return nil
+}
+
+// packChunkID packs a seriesID and a chunkID within it into a global 8 byte ID.
+// It panicks if the seriesID exceeds 5 bytes or the chunk ID 3 bytes.
+func packChunkID(seriesID, chunkID uint64) uint64 {
+	if seriesID > (1<<40)-1 {
+		panic("series ID exceeds 5 bytes")
+	}
+	if chunkID > (1<<24)-1 {
+		panic("chunk ID exceeds 3 bytes")
+	}
+	return (seriesID << 24) | chunkID
+}
+
+func unpackChunkID(id uint64) (seriesID, chunkID uint64) {
+	return id >> 24, (id << 40) >> 40
 }
 
 // Chunk returns the chunk for the reference number.
 func (h *headChunkReader) Chunk(ref uint64) (chunks.Chunk, error) {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
+	sid, cid := unpackChunkID(ref)
 
-	si := ref >> 32
-	ci := (ref << 32) >> 32
+	s := h.head.series.getByID(sid)
 
-	c := &safeChunk{
-		Chunk: h.series[si].chunks[ci].chunk,
-		s:     h.series[si],
-		i:     int(ci),
+	s.mtx.RLock()
+	c := s.chunk(int(cid))
+	s.mtx.RUnlock()
+
+	// Do not expose chunks that are outside of the specified range.
+	if c == nil || !intervalOverlap(c.minTime, c.maxTime, h.mint, h.maxt) {
+		return nil, ErrNotFound
 	}
-	return c, nil
+	return &safeChunk{
+		Chunk: c.chunk,
+		s:     s,
+		cid:   int(cid),
+	}, nil
 }
 
 type safeChunk struct {
 	chunks.Chunk
-	s *memSeries
-	i int
+	s   *memSeries
+	cid int
 }
 
 func (c *safeChunk) Iterator() chunks.Iterator {
 	c.s.mtx.RLock()
 	defer c.s.mtx.RUnlock()
-	return c.s.iterator(c.i)
+	return c.s.iterator(c.cid)
 }
 
 // func (c *safeChunk) Appender() (chunks.Appender, error) { panic("illegal") }
 // func (c *safeChunk) Bytes() []byte                      { panic("illegal") }
 // func (c *safeChunk) Encoding() chunks.Encoding          { panic("illegal") }
 
+type rangeHead struct {
+	head       *Head
+	mint, maxt int64
+}
+
+func (h *rangeHead) Index() IndexReader {
+	return h.head.indexRange(h.mint, h.maxt)
+}
+
+func (h *rangeHead) Chunks() ChunkReader {
+	return h.head.chunksRange(h.mint, h.maxt)
+}
+
+func (h *rangeHead) Tombstones() TombstoneReader {
+	return newEmptyTombstoneReader()
+}
+
 type headIndexReader struct {
-	*HeadBlock
-	// Highest series that existed when the index reader was instantiated.
-	maxSeries uint32
+	head       *Head
+	mint, maxt int64
+}
+
+func (h *headIndexReader) Close() error {
+	return nil
 }
 
 func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
-	return h.symbols, nil
+	h.head.symMtx.RLock()
+	defer h.head.symMtx.RUnlock()
+
+	res := make(map[string]struct{}, len(h.head.symbols))
+
+	for s := range h.head.symbols {
+		res[s] = struct{}{}
+	}
+	return res, nil
 }
 
 // LabelValues returns the possible label values
 func (h *headIndexReader) LabelValues(names ...string) (StringTuples, error) {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
-
 	if len(names) != 1 {
 		return nil, errInvalidSize
 	}
 	var sl []string
 
-	for s := range h.values[names[0]] {
+	h.head.symMtx.RLock()
+	defer h.head.symMtx.RUnlock()
+
+	for s := range h.head.values[names[0]] {
 		sl = append(sl, s)
 	}
 	sort.Strings(sl)
@@ -619,46 +761,43 @@ func (h *headIndexReader) LabelValues(names ...string) (StringTuples, error) {
 
 // Postings returns the postings list iterator for the label pair.
 func (h *headIndexReader) Postings(name, value string) (Postings, error) {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
-
-	return h.postings.get(term{name: name, value: value}), nil
+	return h.head.postings.get(name, value), nil
 }
 
 func (h *headIndexReader) SortedPostings(p Postings) Postings {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
-
-	ep := make([]uint32, 0, 1024)
+	ep := make([]uint64, 0, 128)
 
 	for p.Next() {
-		// Skip posting entries that include series added after we
-		// instantiated the index reader.
-		if p.At() > h.maxSeries {
-			break
-		}
 		ep = append(ep, p.At())
 	}
 	if err := p.Err(); err != nil {
 		return errPostings{err: errors.Wrap(err, "expand postings")}
 	}
+	var err error
 
 	sort.Slice(ep, func(i, j int) bool {
-		return labels.Compare(h.series[ep[i]].lset, h.series[ep[j]].lset) < 0
+		if err != nil {
+			return false
+		}
+		a := h.head.series.getByID(ep[i])
+		b := h.head.series.getByID(ep[j])
+
+		if a == nil || b == nil {
+			err = errors.Errorf("series not found")
+			return false
+		}
+		return labels.Compare(a.lset, b.lset) < 0
 	})
+	if err != nil {
+		return errPostings{err: err}
+	}
 	return newListPostings(ep)
 }
 
 // Series returns the series for the given reference.
-func (h *headIndexReader) Series(ref uint32, lbls *labels.Labels, chks *[]ChunkMeta) error {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
+func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) error {
+	s := h.head.series.getByID(ref)
 
-	if ref > h.maxSeries {
-		return ErrNotFound
-	}
-
-	s := h.series[ref]
 	if s == nil {
 		return ErrNotFound
 	}
@@ -670,10 +809,14 @@ func (h *headIndexReader) Series(ref uint32, lbls *labels.Labels, chks *[]ChunkM
 	*chks = (*chks)[:0]
 
 	for i, c := range s.chunks {
+		// Do not expose chunks that are outside of the specified range.
+		if !intervalOverlap(c.minTime, c.maxTime, h.mint, h.maxt) {
+			continue
+		}
 		*chks = append(*chks, ChunkMeta{
 			MinTime: c.minTime,
 			MaxTime: c.maxTime,
-			Ref:     (uint64(ref) << 32) | uint64(i),
+			Ref:     packChunkID(s.ref, uint64(s.chunkID(i))),
 		})
 	}
 
@@ -681,37 +824,35 @@ func (h *headIndexReader) Series(ref uint32, lbls *labels.Labels, chks *[]ChunkM
 }
 
 func (h *headIndexReader) LabelIndices() ([][]string, error) {
-	h.mtx.RLock()
-	defer h.mtx.RUnlock()
+	h.head.symMtx.RLock()
+	defer h.head.symMtx.RUnlock()
 
 	res := [][]string{}
 
-	for s := range h.values {
+	for s := range h.head.values {
 		res = append(res, []string{s})
 	}
 	return res, nil
 }
 
-// get retrieves the chunk with the hash and label set and creates
-// a new one if it doesn't exist yet.
-func (h *HeadBlock) get(hash uint64, lset labels.Labels) *memSeries {
-	series := h.hashes[hash]
+func (h *Head) create(hash uint64, lset labels.Labels) *memSeries {
+	h.metrics.series.Inc()
+	h.metrics.seriesCreated.Inc()
 
-	for _, s := range series {
-		if s.lset.Equals(lset) {
-			return s
-		}
+	// Optimistically assume that we are the first one to create the series.
+	id := atomic.AddUint64(&h.lastSeriesID, 1)
+	s := newMemSeries(lset, id, h.chunkRange)
+
+	s, created := h.series.getOrSet(hash, s)
+	// Skip indexing if we didn't actually create the series.
+	if !created {
+		return s
 	}
-	return nil
-}
 
-func (h *HeadBlock) create(hash uint64, lset labels.Labels) *memSeries {
-	s := newMemSeries(lset, uint32(len(h.series)), h.meta.MaxTime)
+	h.postings.add(id, lset)
 
-	// Allocate empty space until we can insert at the given index.
-	h.series = append(h.series, s)
-
-	h.hashes[hash] = append(h.hashes[hash], s)
+	h.symMtx.Lock()
+	defer h.symMtx.Unlock()
 
 	for _, l := range lset {
 		valset, ok := h.values[l.Name]
@@ -721,17 +862,179 @@ func (h *HeadBlock) create(hash uint64, lset labels.Labels) *memSeries {
 		}
 		valset.set(l.Value)
 
-		h.postings.add(s.ref, term{name: l.Name, value: l.Value})
-
 		h.symbols[l.Name] = struct{}{}
 		h.symbols[l.Value] = struct{}{}
 	}
 
-	h.postings.add(s.ref, term{})
-
 	return s
 }
 
+// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
+// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
+// Its methods require the hash to be submitted with it to avoid re-computations throughout
+// the code.
+type seriesHashmap map[uint64][]*memSeries
+
+func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
+	for _, s := range m[hash] {
+		if s.lset.Equals(lset) {
+			return s
+		}
+	}
+	return nil
+}
+
+func (m seriesHashmap) set(hash uint64, s *memSeries) {
+	l := m[hash]
+	for i, prev := range l {
+		if prev.lset.Equals(s.lset) {
+			l[i] = s
+			return
+		}
+	}
+	m[hash] = append(l, s)
+}
+
+func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
+	var rem []*memSeries
+	for _, s := range m[hash] {
+		if !s.lset.Equals(lset) {
+			rem = append(rem, s)
+		}
+	}
+	if len(rem) == 0 {
+		delete(m, hash)
+	} else {
+		m[hash] = rem
+	}
+}
+
+// stripeSeries locks modulo ranges of IDs and hashes to reduce lock contention.
+// The locks are padded to not be on the same cache line. Filling the badded space
+// with the maps was profiled to be slower – likely due to the additional pointer
+// dereferences.
+type stripeSeries struct {
+	series [stripeSize]map[uint64]*memSeries
+	hashes [stripeSize]seriesHashmap
+	locks  [stripeSize]stripeLock
+}
+
+const (
+	stripeSize = 1 << 14
+	stripeMask = stripeSize - 1
+)
+
+type stripeLock struct {
+	sync.RWMutex
+	// Padding to avoid multiple locks being on the same cache line.
+	_ [40]byte
+}
+
+func newStripeSeries() *stripeSeries {
+	s := &stripeSeries{}
+
+	for i := range s.series {
+		s.series[i] = map[uint64]*memSeries{}
+	}
+	for i := range s.hashes {
+		s.hashes[i] = seriesHashmap{}
+	}
+	return s
+}
+
+// gc garbage collects old chunks that are strictly before mint and removes
+// series entirely that have no chunks left.
+func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
+	var (
+		deleted  = map[uint64]struct{}{}
+		rmChunks = 0
+	)
+	// Run through all series and truncate old chunks. Mark those with no
+	// chunks left as deleted and store their ID.
+	for i := 0; i < stripeSize; i++ {
+		s.locks[i].Lock()
+
+		for hash, all := range s.hashes[i] {
+			for _, series := range all {
+				series.mtx.Lock()
+				rmChunks += series.truncateChunksBefore(mint)
+
+				if len(series.chunks) > 0 {
+					series.mtx.Unlock()
+					continue
+				}
+
+				// The series is gone entirely. We need to keep the series lock
+				// and make sure we have acquired the stripe locks for hash and ID of the
+				// series alike.
+				// If we don't hold them all, there's a very small chance that a series receives
+				// samples again while we are half-way into deleting it.
+				j := int(series.ref & stripeMask)
+
+				if i != j {
+					s.locks[j].Lock()
+				}
+
+				deleted[series.ref] = struct{}{}
+				s.hashes[i].del(hash, series.lset)
+				delete(s.series[j], series.ref)
+
+				if i != j {
+					s.locks[j].Unlock()
+				}
+
+				series.mtx.Unlock()
+			}
+		}
+
+		s.locks[i].Unlock()
+	}
+
+	return deleted, rmChunks
+}
+
+func (s *stripeSeries) getByID(id uint64) *memSeries {
+	i := id & stripeMask
+
+	s.locks[i].RLock()
+	series := s.series[i][id]
+	s.locks[i].RUnlock()
+
+	return series
+}
+
+func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
+	i := hash & stripeMask
+
+	s.locks[i].RLock()
+	series := s.hashes[i].get(hash, lset)
+	s.locks[i].RUnlock()
+
+	return series
+}
+
+func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
+	i := hash & stripeMask
+
+	s.locks[i].Lock()
+
+	if prev := s.hashes[i].get(hash, series.lset); prev != nil {
+		return prev, false
+	}
+	s.hashes[i].set(hash, series)
+
+	s.hashes[i][hash] = append(s.hashes[i][hash], series)
+	s.locks[i].Unlock()
+
+	i = series.ref & stripeMask
+
+	s.locks[i].Lock()
+	s.series[i][series.ref] = series
+	s.locks[i].Unlock()
+
+	return series, true
+}
+
 type sample struct {
 	t int64
 	v float64
@@ -740,18 +1043,27 @@ type sample struct {
 type memSeries struct {
 	mtx sync.RWMutex
 
-	ref    uint32
-	lset   labels.Labels
-	chunks []*memChunk
+	ref          uint64
+	lset         labels.Labels
+	chunks       []*memChunk
+	chunkRange   int64
+	firstChunkID int
 
 	nextAt    int64 // timestamp at which to cut the next chunk.
-	maxt      int64 // maximum timestamp for the series.
 	lastValue float64
 	sampleBuf [4]sample
 
 	app chunks.Appender // Current appender for the chunk.
 }
 
+func (s *memSeries) minTime() int64 {
+	return s.chunks[0].minTime
+}
+
+func (s *memSeries) maxTime() int64 {
+	return s.head().maxTime
+}
+
 func (s *memSeries) cut(mint int64) *memChunk {
 	c := &memChunk{
 		chunk:   chunks.NewXORChunk(),
@@ -768,41 +1080,92 @@ func (s *memSeries) cut(mint int64) *memChunk {
 	return c
 }
 
-func newMemSeries(lset labels.Labels, id uint32, maxt int64) *memSeries {
+func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
 	s := &memSeries{
-		lset:   lset,
-		ref:    id,
-		maxt:   maxt,
-		nextAt: math.MinInt64,
+		lset:       lset,
+		ref:        id,
+		chunkRange: chunkRange,
+		nextAt:     math.MinInt64,
 	}
 	return s
 }
 
-func (s *memSeries) append(t int64, v float64) bool {
+// appendable checks whether the given sample is valid for appending to the series.
+func (s *memSeries) appendable(t int64, v float64) error {
+	c := s.head()
+	if c == nil {
+		return nil
+	}
+
+	if t > c.maxTime {
+		return nil
+	}
+	if t < c.maxTime {
+		return ErrOutOfOrderSample
+	}
+	// We are allowing exact duplicates as we can encounter them in valid cases
+	// like federation and erroring out at that time would be extremely noisy.
+	if math.Float64bits(s.lastValue) != math.Float64bits(v) {
+		return ErrAmendSample
+	}
+	return nil
+}
+
+func (s *memSeries) chunk(id int) *memChunk {
+	ix := id - s.firstChunkID
+	if ix < 0 || ix >= len(s.chunks) {
+		return nil
+	}
+	return s.chunks[ix]
+}
+
+func (s *memSeries) chunkID(pos int) int {
+	return pos + s.firstChunkID
+}
+
+// truncateChunksBefore removes all chunks from the series that have not timestamp
+// at or after mint. Chunk IDs remain unchanged.
+func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
+	var k int
+	for i, c := range s.chunks {
+		if c.maxTime >= mint {
+			break
+		}
+		k = i + 1
+	}
+	s.chunks = append(s.chunks[:0], s.chunks[k:]...)
+	s.firstChunkID += k
+
+	return k
+}
+
+// append adds the sample (t, v) to the series.
+func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
 	const samplesPerChunk = 120
 
 	s.mtx.Lock()
-	defer s.mtx.Unlock()
 
-	var c *memChunk
+	c := s.head()
 
-	if len(s.chunks) == 0 {
+	if c == nil {
 		c = s.cut(t)
+		chunkCreated = true
 	}
-	c = s.head()
 	if c.maxTime >= t {
-		return false
+		s.mtx.Unlock()
+		return false, chunkCreated
 	}
-	if c.samples > samplesPerChunk/4 && t >= s.nextAt {
+	if c.chunk.NumSamples() > samplesPerChunk/4 && t >= s.nextAt {
 		c = s.cut(t)
+		chunkCreated = true
 	}
 	s.app.Append(t, v)
 
 	c.maxTime = t
-	c.samples++
 
-	if c.samples == samplesPerChunk/4 {
-		s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.maxt)
+	if c.chunk.NumSamples() == samplesPerChunk/4 {
+		_, maxt := rangeForTimestamp(c.minTime, s.chunkRange)
+		s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, maxt)
 	}
 
 	s.lastValue = v
@@ -812,7 +1175,9 @@ func (s *memSeries) append(t int64, v float64) bool {
 	s.sampleBuf[2] = s.sampleBuf[3]
 	s.sampleBuf[3] = sample{t: t, v: v}
 
-	return true
+	s.mtx.Unlock()
+
+	return true, chunkCreated
 }
 
 // computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
@@ -826,30 +1191,33 @@ func computeChunkEndTime(start, cur, max int64) int64 {
 	return start + (max-start)/a
 }
 
-func (s *memSeries) iterator(i int) chunks.Iterator {
-	c := s.chunks[i]
+func (s *memSeries) iterator(id int) chunks.Iterator {
+	c := s.chunk(id)
 
-	if i < len(s.chunks)-1 {
+	if id-s.firstChunkID < len(s.chunks)-1 {
 		return c.chunk.Iterator()
 	}
-
+	// Serve the last 4 samples for the last chunk from the series buffer
+	// as their compressed bytes may be mutated by added samples.
 	it := &memSafeIterator{
 		Iterator: c.chunk.Iterator(),
 		i:        -1,
-		total:    c.samples,
+		total:    c.chunk.NumSamples(),
 		buf:      s.sampleBuf,
 	}
 	return it
 }
 
 func (s *memSeries) head() *memChunk {
+	if len(s.chunks) == 0 {
+		return nil
+	}
 	return s.chunks[len(s.chunks)-1]
 }
 
 type memChunk struct {
 	chunk            chunks.Chunk
 	minTime, maxTime int64
-	samples          int
 }
 
 type memSafeIterator struct {
diff --git a/vendor/github.com/prometheus/tsdb/index.go b/vendor/github.com/prometheus/tsdb/index.go
index e3cce3c00f..ddc2c4f52a 100644
--- a/vendor/github.com/prometheus/tsdb/index.go
+++ b/vendor/github.com/prometheus/tsdb/index.go
@@ -18,7 +18,6 @@ import (
 	"encoding/binary"
 	"fmt"
 	"hash"
-	"hash/crc32"
 	"io"
 	"os"
 	"path/filepath"
@@ -100,7 +99,7 @@ type IndexWriter interface {
 	// their labels.
 	// The reference numbers are used to resolve entries in postings lists that
 	// are added later.
-	AddSeries(ref uint32, l labels.Labels, chunks ...ChunkMeta) error
+	AddSeries(ref uint64, l labels.Labels, chunks ...ChunkMeta) error
 
 	// WriteLabelIndex serializes an index from label names to values.
 	// The passed in values chained tuples of strings of the length of names.
@@ -131,7 +130,7 @@ type indexWriter struct {
 	uint32s []uint32
 
 	symbols       map[string]uint32 // symbol offsets
-	seriesOffsets map[uint32]uint64 // offsets of series
+	seriesOffsets map[uint64]uint64 // offsets of series
 	labelIndexes  []hashEntry       // label index offsets
 	postings      []hashEntry       // postings lists offsets
 
@@ -176,8 +175,8 @@ func newIndexWriter(dir string) (*indexWriter, error) {
 
 		// Caches.
 		symbols:       make(map[string]uint32, 1<<13),
-		seriesOffsets: make(map[uint32]uint64, 1<<16),
-		crc32:         crc32.New(crc32.MakeTable(crc32.Castagnoli)),
+		seriesOffsets: make(map[uint64]uint64, 1<<16),
+		crc32:         newCRC32(),
 	}
 	if err := iw.writeMeta(); err != nil {
 		return nil, err
@@ -261,7 +260,7 @@ func (w *indexWriter) writeMeta() error {
 	return w.write(w.buf1.get())
 }
 
-func (w *indexWriter) AddSeries(ref uint32, lset labels.Labels, chunks ...ChunkMeta) error {
+func (w *indexWriter) AddSeries(ref uint64, lset labels.Labels, chunks ...ChunkMeta) error {
 	if err := w.ensureStage(idxStageSeries); err != nil {
 		return err
 	}
@@ -458,7 +457,10 @@ func (w *indexWriter) WritePostings(name, value string, it Postings) error {
 		if !ok {
 			return errors.Errorf("%p series for reference %d not found", w, it.At())
 		}
-		refs = append(refs, uint32(offset)) // XXX(fabxc): get uint64 vs uint32 sorted out.
+		if offset > (1<<32)-1 {
+			return errors.Errorf("series offset %d exceeds 4 bytes", offset)
+		}
+		refs = append(refs, uint32(offset))
 	}
 	if err := it.Err(); err != nil {
 		return err
@@ -525,7 +527,7 @@ type IndexReader interface {
 
 	// Series populates the given labels and chunk metas for the series identified
 	// by the reference.
-	Series(ref uint32, lset *labels.Labels, chks *[]ChunkMeta) error
+	Series(ref uint64, lset *labels.Labels, chks *[]ChunkMeta) error
 
 	// LabelIndices returns the label pairs for which indices exist.
 	LabelIndices() ([][]string, error)
@@ -741,7 +743,7 @@ func (r *indexReader) LabelIndices() ([][]string, error) {
 	return res, nil
 }
 
-func (r *indexReader) Series(ref uint32, lbls *labels.Labels, chks *[]ChunkMeta) error {
+func (r *indexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkMeta) error {
 	d1 := r.decbufAt(int(ref))
 	d2 := d1.decbuf(int(d1.uvarint()))
 
diff --git a/vendor/github.com/prometheus/tsdb/pool.go b/vendor/github.com/prometheus/tsdb/pool.go
new file mode 100644
index 0000000000..7d0f3f6f08
--- /dev/null
+++ b/vendor/github.com/prometheus/tsdb/pool.go
@@ -0,0 +1,79 @@
+package tsdb
+
+import "sync"
+
+type bucketPool struct {
+	buckets []sync.Pool
+	sizes   []int
+	new     func(sz int) interface{}
+}
+
+func newBucketPool(minSize, maxSize int, factor float64, f func(sz int) interface{}) *bucketPool {
+	if minSize < 1 {
+		panic("invalid minimum pool size")
+	}
+	if maxSize < 1 {
+		panic("invalid maximum pool size")
+	}
+	if factor < 1 {
+		panic("invalid factor")
+	}
+
+	var sizes []int
+
+	for s := minSize; s <= maxSize; s = int(float64(s) * factor) {
+		sizes = append(sizes, s)
+	}
+
+	p := &bucketPool{
+		buckets: make([]sync.Pool, len(sizes)),
+		sizes:   sizes,
+		new:     f,
+	}
+
+	return p
+}
+
+func (p *bucketPool) get(sz int) interface{} {
+	for i, bktSize := range p.sizes {
+		if sz > bktSize {
+			continue
+		}
+		x := p.buckets[i].Get()
+		if x == nil {
+			x = p.new(sz)
+		}
+		return x
+	}
+	return p.new(sz)
+}
+
+func (p *bucketPool) put(x interface{}, sz int) {
+	for i, bktSize := range p.sizes {
+		if sz > bktSize {
+			continue
+		}
+		p.buckets[i].Put(x)
+		return
+	}
+}
+
+type poolUint64 struct {
+	p *bucketPool
+}
+
+func newPoolUint64(minSize, maxSize int, factor float64) poolUint64 {
+	return poolUint64{
+		p: newBucketPool(minSize, maxSize, factor, func(sz int) interface{} {
+			return make([]uint64, 0, sz)
+		}),
+	}
+}
+
+func (p poolUint64) get(sz int) []uint64 {
+	return p.p.get(sz).([]uint64)
+}
+
+func (p poolUint64) put(x []uint64) {
+	p.p.put(x[:0], cap(x))
+}
diff --git a/vendor/github.com/prometheus/tsdb/postings.go b/vendor/github.com/prometheus/tsdb/postings.go
index f2f1eb5b8f..97a29ab197 100644
--- a/vendor/github.com/prometheus/tsdb/postings.go
+++ b/vendor/github.com/prometheus/tsdb/postings.go
@@ -17,31 +17,47 @@ import (
 	"encoding/binary"
 	"sort"
 	"strings"
+	"sync"
+
+	"github.com/prometheus/tsdb/labels"
 )
 
 type memPostings struct {
-	m map[term][]uint32
+	mtx sync.RWMutex
+	m   map[labels.Label][]uint64
 }
 
-type term struct {
-	name, value string
+func newMemPostings() *memPostings {
+	return &memPostings{
+		m: make(map[labels.Label][]uint64, 512),
+	}
 }
 
 // Postings returns an iterator over the postings list for s.
-func (p *memPostings) get(t term) Postings {
-	l := p.m[t]
+func (p *memPostings) get(name, value string) Postings {
+	p.mtx.RLock()
+	l := p.m[labels.Label{Name: name, Value: value}]
+	p.mtx.RUnlock()
+
 	if l == nil {
 		return emptyPostings
 	}
 	return newListPostings(l)
 }
 
+var allLabel = labels.Label{}
+
 // add adds a document to the index. The caller has to ensure that no
 // term argument appears twice.
-func (p *memPostings) add(id uint32, terms ...term) {
-	for _, t := range terms {
-		p.m[t] = append(p.m[t], id)
+func (p *memPostings) add(id uint64, lset labels.Labels) {
+	p.mtx.Lock()
+
+	for _, l := range lset {
+		p.m[l] = append(p.m[l], id)
 	}
+	p.m[allLabel] = append(p.m[allLabel], id)
+
+	p.mtx.Unlock()
 }
 
 // Postings provides iterative access over a postings list.
@@ -51,10 +67,10 @@ type Postings interface {
 
 	// Seek advances the iterator to value v or greater and returns
 	// true if a value was found.
-	Seek(v uint32) bool
+	Seek(v uint64) bool
 
 	// At returns the value at the current iterator position.
-	At() uint32
+	At() uint64
 
 	// Err returns the last error of the iterator.
 	Err() error
@@ -66,8 +82,8 @@ type errPostings struct {
 }
 
 func (e errPostings) Next() bool       { return false }
-func (e errPostings) Seek(uint32) bool { return false }
-func (e errPostings) At() uint32       { return 0 }
+func (e errPostings) Seek(uint64) bool { return false }
+func (e errPostings) At() uint64       { return 0 }
 func (e errPostings) Err() error       { return e.err }
 
 var emptyPostings = errPostings{}
@@ -88,18 +104,18 @@ func Intersect(its ...Postings) Postings {
 type intersectPostings struct {
 	a, b     Postings
 	aok, bok bool
-	cur      uint32
+	cur      uint64
 }
 
 func newIntersectPostings(a, b Postings) *intersectPostings {
 	return &intersectPostings{a: a, b: b}
 }
 
-func (it *intersectPostings) At() uint32 {
+func (it *intersectPostings) At() uint64 {
 	return it.cur
 }
 
-func (it *intersectPostings) doNext(id uint32) bool {
+func (it *intersectPostings) doNext(id uint64) bool {
 	for {
 		if !it.b.Seek(id) {
 			return false
@@ -125,7 +141,7 @@ func (it *intersectPostings) Next() bool {
 	return it.doNext(it.a.At())
 }
 
-func (it *intersectPostings) Seek(id uint32) bool {
+func (it *intersectPostings) Seek(id uint64) bool {
 	if !it.a.Seek(id) {
 		return false
 	}
@@ -155,14 +171,14 @@ type mergedPostings struct {
 	a, b        Postings
 	initialized bool
 	aok, bok    bool
-	cur         uint32
+	cur         uint64
 }
 
 func newMergedPostings(a, b Postings) *mergedPostings {
 	return &mergedPostings{a: a, b: b}
 }
 
-func (it *mergedPostings) At() uint32 {
+func (it *mergedPostings) At() uint64 {
 	return it.cur
 }
 
@@ -204,7 +220,7 @@ func (it *mergedPostings) Next() bool {
 	return true
 }
 
-func (it *mergedPostings) Seek(id uint32) bool {
+func (it *mergedPostings) Seek(id uint64) bool {
 	if it.cur >= id {
 		return true
 	}
@@ -225,15 +241,15 @@ func (it *mergedPostings) Err() error {
 
 // listPostings implements the Postings interface over a plain list.
 type listPostings struct {
-	list []uint32
-	cur  uint32
+	list []uint64
+	cur  uint64
 }
 
-func newListPostings(list []uint32) *listPostings {
+func newListPostings(list []uint64) *listPostings {
 	return &listPostings{list: list}
 }
 
-func (it *listPostings) At() uint32 {
+func (it *listPostings) At() uint64 {
 	return it.cur
 }
 
@@ -247,7 +263,7 @@ func (it *listPostings) Next() bool {
 	return false
 }
 
-func (it *listPostings) Seek(x uint32) bool {
+func (it *listPostings) Seek(x uint64) bool {
 	// If the current value satisfies, then return.
 	if it.cur >= x {
 		return true
@@ -281,8 +297,8 @@ func newBigEndianPostings(list []byte) *bigEndianPostings {
 	return &bigEndianPostings{list: list}
 }
 
-func (it *bigEndianPostings) At() uint32 {
-	return it.cur
+func (it *bigEndianPostings) At() uint64 {
+	return uint64(it.cur)
 }
 
 func (it *bigEndianPostings) Next() bool {
@@ -294,15 +310,15 @@ func (it *bigEndianPostings) Next() bool {
 	return false
 }
 
-func (it *bigEndianPostings) Seek(x uint32) bool {
-	if it.cur >= x {
+func (it *bigEndianPostings) Seek(x uint64) bool {
+	if uint64(it.cur) >= x {
 		return true
 	}
 
 	num := len(it.list) / 4
 	// Do binary search between current position and end.
 	i := sort.Search(num, func(i int) bool {
-		return binary.BigEndian.Uint32(it.list[i*4:]) >= x
+		return binary.BigEndian.Uint32(it.list[i*4:]) >= uint32(x)
 	})
 	if i < num {
 		j := i * 4
diff --git a/vendor/github.com/prometheus/tsdb/querier.go b/vendor/github.com/prometheus/tsdb/querier.go
index 8c2f6cbee2..5461fec89a 100644
--- a/vendor/github.com/prometheus/tsdb/querier.go
+++ b/vendor/github.com/prometheus/tsdb/querier.go
@@ -54,26 +54,6 @@ type querier struct {
 	blocks []Querier
 }
 
-// Querier returns a new querier over the data partition for the given time range.
-// A goroutine must not handle more than one open Querier.
-func (s *DB) Querier(mint, maxt int64) Querier {
-	s.mtx.RLock()
-
-	s.headmtx.RLock()
-	blocks := s.blocksForInterval(mint, maxt)
-	s.headmtx.RUnlock()
-
-	sq := &querier{
-		blocks: make([]Querier, 0, len(blocks)),
-		db:     s,
-	}
-	for _, b := range blocks {
-		sq.blocks = append(sq.blocks, b.Querier(mint, maxt))
-	}
-
-	return sq
-}
-
 func (q *querier) LabelValues(n string) ([]string, error) {
 	return q.lvals(q.blocks, n)
 }
@@ -128,6 +108,18 @@ func (q *querier) Close() error {
 	return merr.Err()
 }
 
+// NewBlockQuerier returns a queries against the readers.
+func NewBlockQuerier(ir IndexReader, cr ChunkReader, tr TombstoneReader, mint, maxt int64) Querier {
+	return &blockQuerier{
+		index:      ir,
+		chunks:     cr,
+		tombstones: tr,
+
+		mint: mint,
+		maxt: maxt,
+	}
+}
+
 // blockQuerier provides querying access to a single block database.
 type blockQuerier struct {
 	index      IndexReader
@@ -348,6 +340,13 @@ type mergedSeriesSet struct {
 	adone, bdone bool
 }
 
+// NewMergedSeriesSet takes two series sets as a single series set. The input series sets
+// must be sorted and sequential in time, i.e. if they have the same label set,
+// the datapoints of a must be before the datapoints of b.
+func NewMergedSeriesSet(a, b SeriesSet) SeriesSet {
+	return newMergedSeriesSet(a, b)
+}
+
 func newMergedSeriesSet(a, b SeriesSet) *mergedSeriesSet {
 	s := &mergedSeriesSet{a: a, b: b}
 	// Initialize first elements of both sets as Next() needs
@@ -403,7 +402,7 @@ func (s *mergedSeriesSet) Next() bool {
 
 type chunkSeriesSet interface {
 	Next() bool
-	At() (labels.Labels, []ChunkMeta, intervals)
+	At() (labels.Labels, []ChunkMeta, Intervals)
 	Err() error
 }
 
@@ -417,11 +416,11 @@ type baseChunkSeries struct {
 
 	lset      labels.Labels
 	chks      []ChunkMeta
-	intervals intervals
+	intervals Intervals
 	err       error
 }
 
-func (s *baseChunkSeries) At() (labels.Labels, []ChunkMeta, intervals) {
+func (s *baseChunkSeries) At() (labels.Labels, []ChunkMeta, Intervals) {
 	return s.lset, s.chks, s.intervals
 }
 
@@ -455,7 +454,7 @@ Outer:
 			// Only those chunks that are not entirely deleted.
 			chks := make([]ChunkMeta, 0, len(s.chks))
 			for _, chk := range s.chks {
-				if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(s.intervals)) {
+				if !(Interval{chk.MinTime, chk.MaxTime}.isSubrange(s.intervals)) {
 					chks = append(chks, chk)
 				}
 			}
@@ -482,10 +481,10 @@ type populatedChunkSeries struct {
 	err       error
 	chks      []ChunkMeta
 	lset      labels.Labels
-	intervals intervals
+	intervals Intervals
 }
 
-func (s *populatedChunkSeries) At() (labels.Labels, []ChunkMeta, intervals) {
+func (s *populatedChunkSeries) At() (labels.Labels, []ChunkMeta, Intervals) {
 	return s.lset, s.chks, s.intervals
 }
 func (s *populatedChunkSeries) Err() error { return s.err }
@@ -570,7 +569,7 @@ type chunkSeries struct {
 
 	mint, maxt int64
 
-	intervals intervals
+	intervals Intervals
 }
 
 func (s *chunkSeries) Labels() labels.Labels {
@@ -676,11 +675,12 @@ type chunkSeriesIterator struct {
 
 	maxt, mint int64
 
-	intervals intervals
+	intervals Intervals
 }
 
-func newChunkSeriesIterator(cs []ChunkMeta, dranges intervals, mint, maxt int64) *chunkSeriesIterator {
+func newChunkSeriesIterator(cs []ChunkMeta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator {
 	it := cs[0].Chunk.Iterator()
+
 	if len(dranges) > 0 {
 		it = &deletedIterator{it: it, intervals: dranges}
 	}
@@ -731,19 +731,22 @@ func (it *chunkSeriesIterator) At() (t int64, v float64) {
 }
 
 func (it *chunkSeriesIterator) Next() bool {
-	for it.cur.Next() {
+	if it.cur.Next() {
 		t, _ := it.cur.At()
-		if t < it.mint {
-			return it.Seek(it.mint)
-		}
 
+		if t < it.mint {
+			if !it.Seek(it.mint) {
+				return false
+			}
+			t, _ = it.At()
+
+			return t <= it.maxt
+		}
 		if t > it.maxt {
 			return false
 		}
-
 		return true
 	}
-
 	if err := it.cur.Err(); err != nil {
 		return false
 	}
diff --git a/vendor/github.com/prometheus/tsdb/tombstones.go b/vendor/github.com/prometheus/tsdb/tombstones.go
index 612b3029fe..7b24407b53 100644
--- a/vendor/github.com/prometheus/tsdb/tombstones.go
+++ b/vendor/github.com/prometheus/tsdb/tombstones.go
@@ -16,7 +16,6 @@ package tsdb
 import (
 	"encoding/binary"
 	"fmt"
-	"hash/crc32"
 	"io"
 	"io/ioutil"
 	"os"
@@ -34,10 +33,15 @@ const (
 	tombstoneFormatV1 = 1
 )
 
+// TombstoneReader is the iterator over tombstones.
+type TombstoneReader interface {
+	Get(ref uint64) Intervals
+}
+
 func writeTombstoneFile(dir string, tr tombstoneReader) error {
 	path := filepath.Join(dir, tombstoneFilename)
 	tmp := path + ".tmp"
-	hash := crc32.New(crc32.MakeTable(crc32.Castagnoli))
+	hash := newCRC32()
 
 	f, err := os.Create(tmp)
 	if err != nil {
@@ -60,9 +64,9 @@ func writeTombstoneFile(dir string, tr tombstoneReader) error {
 	for k, v := range tr {
 		for _, itv := range v {
 			buf.reset()
-			buf.putUvarint32(k)
-			buf.putVarint64(itv.mint)
-			buf.putVarint64(itv.maxt)
+			buf.putUvarint64(k)
+			buf.putVarint64(itv.Mint)
+			buf.putVarint64(itv.Maxt)
 
 			_, err = mw.Write(buf.get())
 			if err != nil {
@@ -82,13 +86,8 @@ func writeTombstoneFile(dir string, tr tombstoneReader) error {
 // Stone holds the information on the posting and time-range
 // that is deleted.
 type Stone struct {
-	ref       uint32
-	intervals intervals
-}
-
-// TombstoneReader is the iterator over tombstones.
-type TombstoneReader interface {
-	Get(ref uint32) intervals
+	ref       uint64
+	intervals Intervals
 }
 
 func readTombstones(dir string) (tombstoneReader, error) {
@@ -114,7 +113,7 @@ func readTombstones(dir string) (tombstoneReader, error) {
 	}
 
 	// Verify checksum
-	hash := crc32.New(crc32.MakeTable(crc32.Castagnoli))
+	hash := newCRC32()
 	if _, err := hash.Write(d.get()); err != nil {
 		return nil, errors.Wrap(err, "write to hash")
 	}
@@ -124,48 +123,49 @@ func readTombstones(dir string) (tombstoneReader, error) {
 
 	stonesMap := newEmptyTombstoneReader()
 	for d.len() > 0 {
-		k := d.uvarint32()
+		k := d.uvarint64()
 		mint := d.varint64()
 		maxt := d.varint64()
 		if d.err() != nil {
 			return nil, d.err()
 		}
 
-		stonesMap.add(k, interval{mint, maxt})
+		stonesMap.add(k, Interval{mint, maxt})
 	}
 
 	return newTombstoneReader(stonesMap), nil
 }
 
-type tombstoneReader map[uint32]intervals
+type tombstoneReader map[uint64]Intervals
 
-func newTombstoneReader(ts map[uint32]intervals) tombstoneReader {
+func newTombstoneReader(ts map[uint64]Intervals) tombstoneReader {
 	return tombstoneReader(ts)
 }
 
 func newEmptyTombstoneReader() tombstoneReader {
-	return tombstoneReader(make(map[uint32]intervals))
+	return tombstoneReader(make(map[uint64]Intervals))
 }
 
-func (t tombstoneReader) Get(ref uint32) intervals {
+func (t tombstoneReader) Get(ref uint64) Intervals {
 	return t[ref]
 }
 
-func (t tombstoneReader) add(ref uint32, itv interval) {
+func (t tombstoneReader) add(ref uint64, itv Interval) {
 	t[ref] = t[ref].add(itv)
 }
 
-type interval struct {
-	mint, maxt int64
+// Interval represents a single time-interval.
+type Interval struct {
+	Mint, Maxt int64
 }
 
-func (tr interval) inBounds(t int64) bool {
-	return t >= tr.mint && t <= tr.maxt
+func (tr Interval) inBounds(t int64) bool {
+	return t >= tr.Mint && t <= tr.Maxt
 }
 
-func (tr interval) isSubrange(dranges intervals) bool {
+func (tr Interval) isSubrange(dranges Intervals) bool {
 	for _, r := range dranges {
-		if r.inBounds(tr.mint) && r.inBounds(tr.maxt) {
+		if r.inBounds(tr.Mint) && r.inBounds(tr.Maxt) {
 			return true
 		}
 	}
@@ -173,43 +173,44 @@ func (tr interval) isSubrange(dranges intervals) bool {
 	return false
 }
 
-type intervals []interval
+// Intervals represents	a set of increasing and non-overlapping time-intervals.
+type Intervals []Interval
 
 // This adds the new time-range to the existing ones.
 // The existing ones must be sorted.
-func (itvs intervals) add(n interval) intervals {
+func (itvs Intervals) add(n Interval) Intervals {
 	for i, r := range itvs {
 		// TODO(gouthamve): Make this codepath easier to digest.
-		if r.inBounds(n.mint-1) || r.inBounds(n.mint) {
-			if n.maxt > r.maxt {
-				itvs[i].maxt = n.maxt
+		if r.inBounds(n.Mint-1) || r.inBounds(n.Mint) {
+			if n.Maxt > r.Maxt {
+				itvs[i].Maxt = n.Maxt
 			}
 
 			j := 0
 			for _, r2 := range itvs[i+1:] {
-				if n.maxt < r2.mint {
+				if n.Maxt < r2.Mint {
 					break
 				}
 				j++
 			}
 			if j != 0 {
-				if itvs[i+j].maxt > n.maxt {
-					itvs[i].maxt = itvs[i+j].maxt
+				if itvs[i+j].Maxt > n.Maxt {
+					itvs[i].Maxt = itvs[i+j].Maxt
 				}
 				itvs = append(itvs[:i+1], itvs[i+j+1:]...)
 			}
 			return itvs
 		}
 
-		if r.inBounds(n.maxt+1) || r.inBounds(n.maxt) {
-			if n.mint < r.maxt {
-				itvs[i].mint = n.mint
+		if r.inBounds(n.Maxt+1) || r.inBounds(n.Maxt) {
+			if n.Mint < r.Maxt {
+				itvs[i].Mint = n.Mint
 			}
 			return itvs
 		}
 
-		if n.mint < r.mint {
-			newRange := make(intervals, i, len(itvs[:i])+1)
+		if n.Mint < r.Mint {
+			newRange := make(Intervals, i, len(itvs[:i])+1)
 			copy(newRange, itvs[:i])
 			newRange = append(newRange, n)
 			newRange = append(newRange, itvs[i:]...)
diff --git a/vendor/github.com/prometheus/tsdb/wal.go b/vendor/github.com/prometheus/tsdb/wal.go
index 50ddb6e346..1dadc8f2c3 100644
--- a/vendor/github.com/prometheus/tsdb/wal.go
+++ b/vendor/github.com/prometheus/tsdb/wal.go
@@ -16,11 +16,14 @@ package tsdb
 import (
 	"bufio"
 	"encoding/binary"
+	"fmt"
 	"hash"
 	"hash/crc32"
 	"io"
 	"math"
 	"os"
+	"path/filepath"
+	"sort"
 	"sync"
 	"time"
 
@@ -53,50 +56,75 @@ const (
 type SamplesCB func([]RefSample) error
 
 // SeriesCB is the callback after reading series.
-type SeriesCB func([]labels.Labels) error
+type SeriesCB func([]RefSeries) error
 
 // DeletesCB is the callback after reading deletes.
 type DeletesCB func([]Stone) error
 
-// SegmentWAL is a write ahead log for series data.
-type SegmentWAL struct {
-	mtx sync.Mutex
-
-	dirFile *os.File
-	files   []*os.File
-
-	logger        log.Logger
-	flushInterval time.Duration
-	segmentSize   int64
-
-	crc32 hash.Hash32
-	cur   *bufio.Writer
-	curN  int64
-
-	stopc chan struct{}
-	donec chan struct{}
-}
-
 // WAL is a write ahead log that can log new series labels and samples.
 // It must be completely read before new entries are logged.
 type WAL interface {
 	Reader() WALReader
-	LogSeries([]labels.Labels) error
+	LogSeries([]RefSeries) error
 	LogSamples([]RefSample) error
 	LogDeletes([]Stone) error
+	Truncate(int64, Postings) error
 	Close() error
 }
 
+// NopWAL is a WAL that does nothing.
+func NopWAL() WAL {
+	return nopWAL{}
+}
+
+type nopWAL struct{}
+
+func (nopWAL) Read(SeriesCB, SamplesCB, DeletesCB) error { return nil }
+func (w nopWAL) Reader() WALReader                       { return w }
+func (nopWAL) LogSeries([]RefSeries) error               { return nil }
+func (nopWAL) LogSamples([]RefSample) error              { return nil }
+func (nopWAL) LogDeletes([]Stone) error                  { return nil }
+func (nopWAL) Truncate(int64, Postings) error            { return nil }
+func (nopWAL) Close() error                              { return nil }
+
 // WALReader reads entries from a WAL.
 type WALReader interface {
 	Read(SeriesCB, SamplesCB, DeletesCB) error
 }
 
+// RefSeries is the series labels with the series ID.
+type RefSeries struct {
+	Ref    uint64
+	Labels labels.Labels
+
+	// hash for the label set. This field is not generally populated.
+	hash uint64
+}
+
 // RefSample is a timestamp/value pair associated with a reference to a series.
 type RefSample struct {
 	Ref uint64
 	T   int64
 	V   float64
+
+	series *memSeries
+}
+
+// segmentFile wraps a file object of a segment and tracks the highest timestamp
+// it contains. During WAL truncating, all segments with no higher timestamp than
+// the truncation threshold can be compacted.
+type segmentFile struct {
+	*os.File
+	maxTime   int64  // highest tombstone or sample timpstamp in segment
+	minSeries uint64 // lowerst series ID in segment
+}
+
+func newSegmentFile(f *os.File) *segmentFile {
+	return &segmentFile{
+		File:      f,
+		maxTime:   math.MinInt64,
+		minSeries: math.MaxUint64,
+	}
 }
 
 const (
@@ -112,6 +140,32 @@ func init() {
 	castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
 }
 
+// newCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
+// polynomial may be easily changed in one location at a later time, if necessary.
+func newCRC32() hash.Hash32 {
+	return crc32.New(castagnoliTable)
+}
+
+// SegmentWAL is a write ahead log for series data.
+type SegmentWAL struct {
+	mtx sync.Mutex
+
+	dirFile *os.File
+	files   []*segmentFile
+
+	logger        log.Logger
+	flushInterval time.Duration
+	segmentSize   int64
+
+	crc32 hash.Hash32
+	cur   *bufio.Writer
+	curN  int64
+
+	stopc   chan struct{}
+	donec   chan struct{}
+	buffers sync.Pool
+}
+
 // OpenSegmentWAL opens or creates a write ahead log in the given directory.
 // The WAL must be read completely before new data is written.
 func OpenSegmentWAL(dir string, logger log.Logger, flushInterval time.Duration) (*SegmentWAL, error) {
@@ -133,157 +187,379 @@ func OpenSegmentWAL(dir string, logger log.Logger, flushInterval time.Duration)
 		donec:         make(chan struct{}),
 		stopc:         make(chan struct{}),
 		segmentSize:   walSegmentSizeBytes,
-		crc32:         crc32.New(castagnoliTable),
+		crc32:         newCRC32(),
 	}
-	if err := w.initSegments(); err != nil {
+
+	fns, err := sequenceFiles(w.dirFile.Name())
+	if err != nil {
 		return nil, err
 	}
+	for _, fn := range fns {
+		f, err := w.openSegmentFile(fn)
+		if err != nil {
+			return nil, err
+		}
+		w.files = append(w.files, newSegmentFile(f))
+	}
 
 	go w.run(flushInterval)
 
 	return w, nil
 }
 
+// repairingWALReader wraps a WAL reader and truncates its underlying SegmentWAL after the last
+// valid entry if it encounters corruption.
+type repairingWALReader struct {
+	wal *SegmentWAL
+	r   WALReader
+}
+
+func (r *repairingWALReader) Read(series SeriesCB, samples SamplesCB, deletes DeletesCB) error {
+	err := r.r.Read(series, samples, deletes)
+	if err == nil {
+		return nil
+	}
+	cerr, ok := err.(walCorruptionErr)
+	if !ok {
+		return err
+	}
+	return r.wal.truncate(cerr.err, cerr.file, cerr.lastOffset)
+}
+
+// truncate the WAL after the last valid entry.
+func (w *SegmentWAL) truncate(err error, file int, lastOffset int64) error {
+	w.logger.Log("msg", "WAL corruption detected; truncating",
+		"err", err, "file", w.files[file].Name(), "pos", lastOffset)
+
+	// Close and delete all files after the current one.
+	for _, f := range w.files[file+1:] {
+		if err := f.Close(); err != nil {
+			return err
+		}
+		if err := os.Remove(f.Name()); err != nil {
+			return err
+		}
+	}
+	w.mtx.Lock()
+	defer w.mtx.Unlock()
+
+	w.files = w.files[:file+1]
+
+	// Seek the current file to the last valid offset where we continue writing from.
+	_, err = w.files[file].Seek(lastOffset, os.SEEK_SET)
+	return err
+}
+
 // Reader returns a new reader over the the write ahead log data.
 // It must be completely consumed before writing to the WAL.
 func (w *SegmentWAL) Reader() WALReader {
-	return newWALReader(w, w.logger)
+	return &repairingWALReader{
+		wal: w,
+		r:   newWALReader(w.files, w.logger),
+	}
 }
 
-// Log writes a batch of new series labels and samples to the log.
-//func (w *SegmentWAL) Log(series []labels.Labels, samples []RefSample) error {
-//return nil
-//}
+func (w *SegmentWAL) getBuffer() *encbuf {
+	b := w.buffers.Get()
+	if b == nil {
+		return &encbuf{b: make([]byte, 0, 64*1024)}
+	}
+	return b.(*encbuf)
+}
 
-// LogSeries writes a batch of new series labels to the log.
-func (w *SegmentWAL) LogSeries(series []labels.Labels) error {
-	if err := w.encodeSeries(series); err != nil {
+func (w *SegmentWAL) putBuffer(b *encbuf) {
+	b.reset()
+	w.buffers.Put(b)
+}
+
+// Truncate deletes the values prior to mint and the series entries not in p.
+func (w *SegmentWAL) Truncate(mint int64, p Postings) error {
+	// The last segment is always active.
+	if len(w.files) < 2 {
+		return nil
+	}
+	var candidates []*segmentFile
+
+	// All files have to be traversed as there could be two segments for a block
+	// with first block having times (10000, 20000) and SECOND one having (0, 10000).
+	for _, sf := range w.files[:len(w.files)-1] {
+		if sf.maxTime >= mint {
+			break
+		}
+		// Past WAL files are closed. We have to reopen them for another read.
+		f, err := w.openSegmentFile(sf.Name())
+		if err != nil {
+			return errors.Wrap(err, "open old WAL segment for read")
+		}
+		candidates = append(candidates, &segmentFile{
+			File:      f,
+			minSeries: sf.minSeries,
+			maxTime:   sf.maxTime,
+		})
+	}
+	if len(candidates) == 0 {
+		return nil
+	}
+
+	r := newWALReader(candidates, w.logger)
+
+	// Create a new tmp file.
+	f, err := w.createSegmentFile(filepath.Join(w.dirFile.Name(), "compact.tmp"))
+	if err != nil {
+		return errors.Wrap(err, "create compaction segment")
+	}
+	var (
+		csf          = newSegmentFile(f)
+		crc32        = newCRC32()
+		activeSeries = []RefSeries{}
+	)
+
+Loop:
+	for r.next() {
+		rt, flag, byt := r.at()
+
+		if rt != WALEntrySeries {
+			continue
+		}
+		series, err := r.decodeSeries(flag, byt)
+		if err != nil {
+			return errors.Wrap(err, "decode samples while truncating")
+		}
+		activeSeries = activeSeries[:0]
+
+		for _, s := range series {
+			if !p.Seek(s.Ref) {
+				break Loop
+			}
+			if p.At() == s.Ref {
+				activeSeries = append(activeSeries, s)
+			}
+		}
+
+		buf := w.getBuffer()
+		flag = w.encodeSeries(buf, activeSeries)
+
+		_, err = w.writeTo(csf, crc32, WALEntrySeries, flag, buf.get())
+		w.putBuffer(buf)
+
+		if err != nil {
+			return err
+		}
+	}
+	if r.Err() != nil {
+		return errors.Wrap(r.Err(), "read candidate WAL files")
+	}
+
+	off, err := csf.Seek(0, os.SEEK_CUR)
+	if err != nil {
+		return err
+	}
+	if err := csf.Truncate(off); err != nil {
+		return err
+	}
+	csf.Sync()
+	csf.Close()
+
+	if err := renameFile(csf.Name(), candidates[0].Name()); err != nil {
+		return err
+	}
+	for _, f := range candidates[1:] {
+		if err := os.RemoveAll(f.Name()); err != nil {
+			return errors.Wrap(err, "delete WAL segment file")
+		}
+		f.Close()
+	}
+	if err := w.dirFile.Sync(); err != nil {
 		return err
 	}
 
+	// The file object of csf still holds the name before rename. Recreate it so
+	// subsequent truncations do not look at a non-existant file name.
+	csf.File, err = w.openSegmentFile(candidates[0].Name())
+	if err != nil {
+		return err
+	}
+	// We don't need it to be open.
+	csf.Close()
+
+	w.mtx.Lock()
+	w.files = append([]*segmentFile{csf}, w.files[len(candidates):]...)
+	w.mtx.Unlock()
+
+	return nil
+}
+
+// LogSeries writes a batch of new series labels to the log.
+// The series have to be ordered.
+func (w *SegmentWAL) LogSeries(series []RefSeries) error {
+	buf := w.getBuffer()
+
+	flag := w.encodeSeries(buf, series)
+	err := w.write(WALEntrySeries, flag, buf.get())
+
+	w.putBuffer(buf)
+
+	if err != nil {
+		return errors.Wrap(err, "log series")
+	}
+
+	tf := w.head()
+
+	for _, s := range series {
+		if tf.minSeries > s.Ref {
+			tf.minSeries = s.Ref
+		}
+	}
+
 	if w.flushInterval <= 0 {
-		return w.Sync()
+		return errors.Wrap(w.Sync(), "sync")
 	}
 	return nil
 }
 
 // LogSamples writes a batch of new samples to the log.
 func (w *SegmentWAL) LogSamples(samples []RefSample) error {
-	if err := w.encodeSamples(samples); err != nil {
-		return err
+	buf := w.getBuffer()
+
+	flag := w.encodeSamples(buf, samples)
+	err := w.write(WALEntrySamples, flag, buf.get())
+
+	w.putBuffer(buf)
+
+	if err != nil {
+		return errors.Wrap(err, "log series")
+	}
+	tf := w.head()
+
+	for _, s := range samples {
+		if tf.maxTime < s.T {
+			tf.maxTime = s.T
+		}
 	}
 
 	if w.flushInterval <= 0 {
-		return w.Sync()
+		return errors.Wrap(w.Sync(), "sync")
 	}
 	return nil
 }
 
 // LogDeletes write a batch of new deletes to the log.
 func (w *SegmentWAL) LogDeletes(stones []Stone) error {
-	if err := w.encodeDeletes(stones); err != nil {
-		return err
+	buf := w.getBuffer()
+
+	flag := w.encodeDeletes(buf, stones)
+	err := w.write(WALEntryDeletes, flag, buf.get())
+
+	w.putBuffer(buf)
+
+	if err != nil {
+		return errors.Wrap(err, "log series")
+	}
+	tf := w.head()
+
+	for _, s := range stones {
+		for _, iv := range s.intervals {
+			if tf.maxTime < iv.Maxt {
+				tf.maxTime = iv.Maxt
+			}
+		}
 	}
 
 	if w.flushInterval <= 0 {
-		return w.Sync()
+		return errors.Wrap(w.Sync(), "sync")
 	}
 	return nil
 }
 
-// initSegments finds all existing segment files and opens them in the
-// appropriate file modes.
-func (w *SegmentWAL) initSegments() error {
-	fns, err := sequenceFiles(w.dirFile.Name(), "")
-	if err != nil {
-		return err
-	}
-	if len(fns) == 0 {
-		return nil
-	}
+// openSegmentFile opens the given segment file and consumes and validates header.
+func (w *SegmentWAL) openSegmentFile(name string) (*os.File, error) {
 	// We must open all files in read/write mode as we may have to truncate along
-	// the way and any file may become the tail.
-	for _, fn := range fns {
-		f, err := os.OpenFile(fn, os.O_RDWR, 0666)
-		if err != nil {
-			return err
-		}
-		w.files = append(w.files, f)
+	// the way and any file may become the head.
+	f, err := os.OpenFile(name, os.O_RDWR, 0666)
+	if err != nil {
+		return nil, err
+	}
+	metab := make([]byte, 8)
+
+	if n, err := f.Read(metab); err != nil {
+		return nil, errors.Wrapf(err, "validate meta %q", f.Name())
+	} else if n != 8 {
+		return nil, errors.Errorf("invalid header size %d in %q", n, f.Name())
 	}
 
-	// Consume and validate meta headers.
-	for _, f := range w.files {
-		metab := make([]byte, 8)
-
-		if n, err := f.Read(metab); err != nil {
-			return errors.Wrapf(err, "validate meta %q", f.Name())
-		} else if n != 8 {
-			return errors.Errorf("invalid header size %d in %q", n, f.Name())
-		}
-
-		if m := binary.BigEndian.Uint32(metab[:4]); m != WALMagic {
-			return errors.Errorf("invalid magic header %x in %q", m, f.Name())
-		}
-		if metab[4] != WALFormatDefault {
-			return errors.Errorf("unknown WAL segment format %d in %q", metab[4], f.Name())
-		}
+	if m := binary.BigEndian.Uint32(metab[:4]); m != WALMagic {
+		return nil, errors.Errorf("invalid magic header %x in %q", m, f.Name())
 	}
-
-	return nil
+	if metab[4] != WALFormatDefault {
+		return nil, errors.Errorf("unknown WAL segment format %d in %q", metab[4], f.Name())
+	}
+	return f, nil
 }
 
-// cut finishes the currently active segments and opens the next one.
-// The encoder is reset to point to the new segment.
-func (w *SegmentWAL) cut() error {
-	// Sync current tail to disk and close.
-	if tf := w.tail(); tf != nil {
-		if err := w.sync(); err != nil {
-			return err
-		}
-		off, err := tf.Seek(0, os.SEEK_CUR)
-		if err != nil {
-			return err
-		}
-		if err := tf.Truncate(off); err != nil {
-			return err
-		}
-		if err := tf.Close(); err != nil {
-			return err
-		}
-	}
-
-	p, _, err := nextSequenceFile(w.dirFile.Name(), "")
+// createSegmentFile creates a new segment file with the given name. It preallocates
+// the standard segment size if possible and writes the header.
+func (w *SegmentWAL) createSegmentFile(name string) (*os.File, error) {
+	f, err := os.Create(name)
 	if err != nil {
-		return err
-	}
-	f, err := os.Create(p)
-	if err != nil {
-		return err
+		return nil, err
 	}
 	if err = fileutil.Preallocate(f, w.segmentSize, true); err != nil {
-		return err
+		return nil, err
 	}
-	if err = w.dirFile.Sync(); err != nil {
-		return err
-	}
-
 	// Write header metadata for new file.
 	metab := make([]byte, 8)
 	binary.BigEndian.PutUint32(metab[:4], WALMagic)
 	metab[4] = WALFormatDefault
 
 	if _, err := f.Write(metab); err != nil {
+		return nil, err
+	}
+	return f, err
+}
+
+// cut finishes the currently active segments and opens the next one.
+// The encoder is reset to point to the new segment.
+func (w *SegmentWAL) cut() error {
+	// Sync current head to disk and close.
+	if hf := w.head(); hf != nil {
+		if err := w.sync(); err != nil {
+			return err
+		}
+		off, err := hf.Seek(0, os.SEEK_CUR)
+		if err != nil {
+			return err
+		}
+		if err := hf.Truncate(off); err != nil {
+			return err
+		}
+		if err := hf.Close(); err != nil {
+			return err
+		}
+	}
+
+	p, _, err := nextSequenceFile(w.dirFile.Name())
+	if err != nil {
+		return err
+	}
+	f, err := w.createSegmentFile(p)
+	if err != nil {
 		return err
 	}
 
-	w.files = append(w.files, f)
-	w.cur = bufio.NewWriterSize(f, 4*1024*1024)
+	if err = w.dirFile.Sync(); err != nil {
+		return err
+	}
+
+	w.files = append(w.files, newSegmentFile(f))
+
+	// TODO(gouthamve): make the buffer size a constant.
+	w.cur = bufio.NewWriterSize(f, 8*1024*1024)
 	w.curN = 8
 
 	return nil
 }
 
-func (w *SegmentWAL) tail() *os.File {
+func (w *SegmentWAL) head() *segmentFile {
 	if len(w.files) == 0 {
 		return nil
 	}
@@ -292,20 +568,40 @@ func (w *SegmentWAL) tail() *os.File {
 
 // Sync flushes the changes to disk.
 func (w *SegmentWAL) Sync() error {
-	w.mtx.Lock()
-	defer w.mtx.Unlock()
+	var head *segmentFile
+	var err error
 
-	return w.sync()
+	// Flush the writer and retrieve the reference to the head segment under mutex lock.
+	func() {
+		w.mtx.Lock()
+		defer w.mtx.Unlock()
+		if err = w.flush(); err != nil {
+			return
+		}
+		head = w.head()
+	}()
+	if err != nil {
+		return errors.Wrap(err, "flush buffer")
+	}
+	if head != nil {
+		// But only fsync the head segment after releasing the mutex as it will block on disk I/O.
+		return fileutil.Fdatasync(head.File)
+	}
+	return nil
 }
 
 func (w *SegmentWAL) sync() error {
+	if err := w.flush(); err != nil {
+		return err
+	}
+	return fileutil.Fdatasync(w.head().File)
+}
+
+func (w *SegmentWAL) flush() error {
 	if w.cur == nil {
 		return nil
 	}
-	if err := w.cur.Flush(); err != nil {
-		return err
-	}
-	return fileutil.Fdatasync(w.tail())
+	return w.cur.Flush()
 }
 
 func (w *SegmentWAL) run(interval time.Duration) {
@@ -335,17 +631,16 @@ func (w *SegmentWAL) Close() error {
 	close(w.stopc)
 	<-w.donec
 
-	// Lock mutex and leave it locked so we panic if there's a bug causing
-	// the block to be used afterwards.
 	w.mtx.Lock()
+	defer w.mtx.Unlock()
 
 	if err := w.sync(); err != nil {
 		return err
 	}
 	// On opening, a WAL must be fully consumed once. Afterwards
 	// only the current segment will still be open.
-	if tf := w.tail(); tf != nil {
-		return errors.Wrapf(tf.Close(), "closing WAL tail %s", tf.Name())
+	if hf := w.head(); hf != nil {
+		return errors.Wrapf(hf.Close(), "closing WAL head %s", hf.Name())
 	}
 	return nil
 }
@@ -359,15 +654,14 @@ const (
 	walPageBytes = 16 * minSectorSize
 )
 
-func (w *SegmentWAL) entry(et WALEntryType, flag byte, buf []byte) error {
+func (w *SegmentWAL) write(t WALEntryType, flag uint8, buf []byte) error {
 	w.mtx.Lock()
 	defer w.mtx.Unlock()
-
 	// Cut to the next segment if the entry exceeds the file size unless it would also
 	// exceed the size of a new segment.
+	// TODO(gouthamve): Add a test for this case where the commit is greater than segmentSize.
 	var (
-		// 6-byte header + 4-byte CRC32 + buf.
-		sz    = int64(6 + 4 + len(buf))
+		sz    = int64(len(buf)) + 6
 		newsz = w.curN + sz
 	)
 	// XXX(fabxc): this currently cuts a new file whenever the WAL was newly opened.
@@ -377,30 +671,37 @@ func (w *SegmentWAL) entry(et WALEntryType, flag byte, buf []byte) error {
 			return err
 		}
 	}
+	n, err := w.writeTo(w.cur, w.crc32, t, flag, buf)
 
-	w.crc32.Reset()
-	wr := io.MultiWriter(w.crc32, w.cur)
+	w.curN += int64(n)
 
-	b := make([]byte, 6)
-	b[0] = byte(et)
+	return err
+}
+
+func (w *SegmentWAL) writeTo(wr io.Writer, crc32 hash.Hash, t WALEntryType, flag uint8, buf []byte) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	crc32.Reset()
+	wr = io.MultiWriter(crc32, wr)
+
+	var b [6]byte
+	b[0] = byte(t)
 	b[1] = flag
 
 	binary.BigEndian.PutUint32(b[2:], uint32(len(buf)))
 
-	if _, err := wr.Write(b); err != nil {
-		return err
+	n1, err := wr.Write(b[:])
+	if err != nil {
+		return n1, err
 	}
-	if _, err := wr.Write(buf); err != nil {
-		return err
-	}
-	if _, err := w.cur.Write(w.crc32.Sum(nil)); err != nil {
-		return err
+	n2, err := wr.Write(buf)
+	if err != nil {
+		return n1 + n2, err
 	}
+	n3, err := wr.Write(crc32.Sum(b[:0]))
 
-	w.curN += sz
-
-	putWALBuffer(buf)
-	return nil
+	return n1 + n2 + n3, err
 }
 
 const (
@@ -409,122 +710,77 @@ const (
 	walDeletesSimple = 1
 )
 
-var walBuffers = sync.Pool{}
+func (w *SegmentWAL) encodeSeries(buf *encbuf, series []RefSeries) uint8 {
+	for _, s := range series {
+		buf.putBE64(s.Ref)
+		buf.putUvarint(len(s.Labels))
 
-func getWALBuffer() []byte {
-	b := walBuffers.Get()
-	if b == nil {
-		return make([]byte, 0, 64*1024)
-	}
-	return b.([]byte)
-}
-
-func putWALBuffer(b []byte) {
-	b = b[:0]
-	walBuffers.Put(b)
-}
-
-func (w *SegmentWAL) encodeSeries(series []labels.Labels) error {
-	if len(series) == 0 {
-		return nil
-	}
-
-	b := make([]byte, binary.MaxVarintLen32)
-	buf := getWALBuffer()
-
-	for _, lset := range series {
-		n := binary.PutUvarint(b, uint64(len(lset)))
-		buf = append(buf, b[:n]...)
-
-		for _, l := range lset {
-			n = binary.PutUvarint(b, uint64(len(l.Name)))
-			buf = append(buf, b[:n]...)
-			buf = append(buf, l.Name...)
-
-			n = binary.PutUvarint(b, uint64(len(l.Value)))
-			buf = append(buf, b[:n]...)
-			buf = append(buf, l.Value...)
+		for _, l := range s.Labels {
+			buf.putUvarintStr(l.Name)
+			buf.putUvarintStr(l.Value)
 		}
 	}
-
-	return w.entry(WALEntrySeries, walSeriesSimple, buf)
+	return walSeriesSimple
 }
 
-func (w *SegmentWAL) encodeSamples(samples []RefSample) error {
+func (w *SegmentWAL) encodeSamples(buf *encbuf, samples []RefSample) uint8 {
 	if len(samples) == 0 {
-		return nil
+		return walSamplesSimple
 	}
-
-	b := make([]byte, binary.MaxVarintLen64)
-	buf := getWALBuffer()
-
 	// Store base timestamp and base reference number of first sample.
 	// All samples encode their timestamp and ref as delta to those.
 	//
 	// TODO(fabxc): optimize for all samples having the same timestamp.
 	first := samples[0]
 
-	binary.BigEndian.PutUint64(b, first.Ref)
-	buf = append(buf, b[:8]...)
-	binary.BigEndian.PutUint64(b, uint64(first.T))
-	buf = append(buf, b[:8]...)
+	buf.putBE64(first.Ref)
+	buf.putBE64int64(first.T)
 
 	for _, s := range samples {
-		n := binary.PutVarint(b, int64(s.Ref)-int64(first.Ref))
-		buf = append(buf, b[:n]...)
-
-		n = binary.PutVarint(b, s.T-first.T)
-		buf = append(buf, b[:n]...)
-
-		binary.BigEndian.PutUint64(b, math.Float64bits(s.V))
-		buf = append(buf, b[:8]...)
+		buf.putVarint64(int64(s.Ref) - int64(first.Ref))
+		buf.putVarint64(s.T - first.T)
+		buf.putBE64(math.Float64bits(s.V))
 	}
-
-	return w.entry(WALEntrySamples, walSamplesSimple, buf)
+	return walSamplesSimple
 }
 
-func (w *SegmentWAL) encodeDeletes(stones []Stone) error {
-	b := make([]byte, 2*binary.MaxVarintLen64)
-	eb := &encbuf{b: b}
-	buf := getWALBuffer()
+func (w *SegmentWAL) encodeDeletes(buf *encbuf, stones []Stone) uint8 {
 	for _, s := range stones {
-		for _, itv := range s.intervals {
-			eb.reset()
-			eb.putUvarint32(s.ref)
-			eb.putVarint64(itv.mint)
-			eb.putVarint64(itv.maxt)
-			buf = append(buf, eb.get()...)
+		for _, iv := range s.intervals {
+			buf.putBE64(s.ref)
+			buf.putVarint64(iv.Mint)
+			buf.putVarint64(iv.Maxt)
 		}
 	}
-
-	return w.entry(WALEntryDeletes, walDeletesSimple, buf)
+	return walDeletesSimple
 }
 
 // walReader decodes and emits write ahead log entries.
 type walReader struct {
 	logger log.Logger
 
-	wal   *SegmentWAL
+	files []*segmentFile
 	cur   int
 	buf   []byte
 	crc32 hash.Hash32
 
-	curType WALEntryType
-	curFlag byte
-	curBuf  []byte
+	curType    WALEntryType
+	curFlag    byte
+	curBuf     []byte
+	lastOffset int64 // offset after last successfully read entry
 
 	err error
 }
 
-func newWALReader(w *SegmentWAL, l log.Logger) *walReader {
+func newWALReader(files []*segmentFile, l log.Logger) *walReader {
 	if l == nil {
 		l = log.NewNopLogger()
 	}
 	return &walReader{
 		logger: l,
-		wal:    w,
+		files:  files,
 		buf:    make([]byte, 0, 128*4096),
-		crc32:  crc32.New(crc32.MakeTable(crc32.Castagnoli)),
+		crc32:  newCRC32(),
 	}
 }
 
@@ -534,29 +790,69 @@ func (r *walReader) Err() error {
 }
 
 func (r *walReader) Read(seriesf SeriesCB, samplesf SamplesCB, deletesf DeletesCB) error {
+	if seriesf == nil {
+		seriesf = func([]RefSeries) error { return nil }
+	}
+	if samplesf == nil {
+		samplesf = func([]RefSample) error { return nil }
+	}
+	if deletesf == nil {
+		deletesf = func([]Stone) error { return nil }
+	}
+
 	for r.next() {
 		et, flag, b := r.at()
 		// In decoding below we never return a walCorruptionErr for now.
 		// Those should generally be catched by entry decoding before.
 		switch et {
 		case WALEntrySeries:
-			s, err := r.decodeSeries(flag, b)
+			series, err := r.decodeSeries(flag, b)
 			if err != nil {
-				return err
+				return errors.Wrap(err, "decode series entry")
 			}
-			seriesf(s)
+			seriesf(series)
+
+			cf := r.current()
+
+			for _, s := range series {
+				if cf.minSeries > s.Ref {
+					cf.minSeries = s.Ref
+				}
+			}
+
 		case WALEntrySamples:
-			s, err := r.decodeSamples(flag, b)
+			samples, err := r.decodeSamples(flag, b)
 			if err != nil {
-				return err
+				return errors.Wrap(err, "decode samples entry")
 			}
-			samplesf(s)
+			samplesf(samples)
+
+			// Update the times for the WAL segment file.
+			cf := r.current()
+
+			for _, s := range samples {
+				if cf.maxTime < s.T {
+					cf.maxTime = s.T
+				}
+			}
+
 		case WALEntryDeletes:
-			s, err := r.decodeDeletes(flag, b)
+			stones, err := r.decodeDeletes(flag, b)
 			if err != nil {
-				return err
+				return errors.Wrap(err, "decode delete entry")
+			}
+			deletesf(stones)
+			// Update the times for the WAL segment file.
+
+			cf := r.current()
+
+			for _, s := range stones {
+				for _, iv := range s.intervals {
+					if cf.maxTime < iv.Maxt {
+						cf.maxTime = iv.Maxt
+					}
+				}
 			}
-			deletesf(s)
 		}
 	}
 
@@ -565,20 +861,17 @@ func (r *walReader) Read(seriesf SeriesCB, samplesf SamplesCB, deletesf DeletesC
 
 // nextEntry retrieves the next entry. It is also used as a testing hook.
 func (r *walReader) nextEntry() (WALEntryType, byte, []byte, error) {
-	if r.cur >= len(r.wal.files) {
+	if r.cur >= len(r.files) {
 		return 0, 0, nil, io.EOF
 	}
-	cf := r.wal.files[r.cur]
+	cf := r.current()
 
 	et, flag, b, err := r.entry(cf)
-	// If we reached the end of the reader, advance to the next one
-	// and close.
+	// If we reached the end of the reader, advance to the next one and close.
 	// Do not close on the last one as it will still be appended to.
-	if err == io.EOF && r.cur < len(r.wal.files)-1 {
-		// Current reader completed, close and move to the next one.
-		if err := cf.Close(); err != nil {
-			return 0, 0, nil, err
-		}
+	if err == io.EOF && r.cur < len(r.files)-1 {
+		// Current reader completed. Leave the file open for later reads
+		// for truncating.
 		r.cur++
 		return r.nextEntry()
 	}
@@ -592,15 +885,15 @@ func (r *walReader) at() (WALEntryType, byte, []byte) {
 // next returns decodes the next entry pair and returns true
 // if it was succesful.
 func (r *walReader) next() bool {
-	if r.cur >= len(r.wal.files) {
+	if r.cur >= len(r.files) {
 		return false
 	}
-	cf := r.wal.files[r.cur]
+	cf := r.files[r.cur]
 
-	// Save position after last valid entry if we have to truncate the WAL.
-	lastOffset, err := cf.Seek(0, os.SEEK_CUR)
-	if err != nil {
-		r.err = err
+	// Remember the offset after the last correctly read entry. If the next one
+	// is corrupted, this is where we can safely truncate.
+	r.lastOffset, r.err = cf.Seek(0, os.SEEK_CUR)
+	if r.err != nil {
 		return false
 	}
 
@@ -609,7 +902,7 @@ func (r *walReader) next() bool {
 	// and close.
 	// Do not close on the last one as it will still be appended to.
 	if err == io.EOF {
-		if r.cur == len(r.wal.files)-1 {
+		if r.cur == len(r.files)-1 {
 			return false
 		}
 		// Current reader completed, close and move to the next one.
@@ -622,10 +915,6 @@ func (r *walReader) next() bool {
 	}
 	if err != nil {
 		r.err = err
-
-		if _, ok := err.(walCorruptionErr); ok {
-			r.err = r.truncate(lastOffset)
-		}
 		return false
 	}
 
@@ -635,37 +924,28 @@ func (r *walReader) next() bool {
 	return r.err == nil
 }
 
-func (r *walReader) current() *os.File {
-	return r.wal.files[r.cur]
-}
-
-// truncate the WAL after the last valid entry.
-func (r *walReader) truncate(lastOffset int64) error {
-	r.logger.Log("msg", "WAL corruption detected; truncating",
-		"err", r.err, "file", r.current().Name(), "pos", lastOffset)
-
-	// Close and delete all files after the current one.
-	for _, f := range r.wal.files[r.cur+1:] {
-		if err := f.Close(); err != nil {
-			return err
-		}
-		if err := os.Remove(f.Name()); err != nil {
-			return err
-		}
-	}
-	r.wal.files = r.wal.files[:r.cur+1]
-
-	// Seek the current file to the last valid offset where we continue writing from.
-	_, err := r.current().Seek(lastOffset, os.SEEK_SET)
-	return err
+func (r *walReader) current() *segmentFile {
+	return r.files[r.cur]
 }
 
 // walCorruptionErr is a type wrapper for errors that indicate WAL corruption
 // and trigger a truncation.
-type walCorruptionErr error
+type walCorruptionErr struct {
+	err        error
+	file       int
+	lastOffset int64
+}
 
-func walCorruptionErrf(s string, args ...interface{}) error {
-	return walCorruptionErr(errors.Errorf(s, args...))
+func (e walCorruptionErr) Error() string {
+	return fmt.Sprintf("%s <file: %d, lastOffset: %d>", e.err, e.file, e.lastOffset)
+}
+
+func (r *walReader) corruptionErr(s string, args ...interface{}) error {
+	return walCorruptionErr{
+		err:        errors.Errorf(s, args...),
+		file:       r.cur,
+		lastOffset: r.lastOffset,
+	}
 }
 
 func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
@@ -676,7 +956,7 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
 	if n, err := tr.Read(b); err != nil {
 		return 0, 0, nil, err
 	} else if n != 6 {
-		return 0, 0, nil, walCorruptionErrf("invalid entry header size %d", n)
+		return 0, 0, nil, r.corruptionErr("invalid entry header size %d", n)
 	}
 
 	var (
@@ -689,7 +969,7 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
 		return 0, 0, nil, io.EOF
 	}
 	if etype != WALEntrySeries && etype != WALEntrySamples && etype != WALEntryDeletes {
-		return 0, 0, nil, walCorruptionErrf("invalid entry type %d", etype)
+		return 0, 0, nil, r.corruptionErr("invalid entry type %d", etype)
 	}
 
 	if length > len(r.buf) {
@@ -700,107 +980,100 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
 	if n, err := tr.Read(buf); err != nil {
 		return 0, 0, nil, err
 	} else if n != length {
-		return 0, 0, nil, walCorruptionErrf("invalid entry body size %d", n)
+		return 0, 0, nil, r.corruptionErr("invalid entry body size %d", n)
 	}
 
 	if n, err := cr.Read(b[:4]); err != nil {
 		return 0, 0, nil, err
 	} else if n != 4 {
-		return 0, 0, nil, walCorruptionErrf("invalid checksum length %d", n)
+		return 0, 0, nil, r.corruptionErr("invalid checksum length %d", n)
 	}
 	if exp, has := binary.BigEndian.Uint32(b[:4]), r.crc32.Sum32(); has != exp {
-		return 0, 0, nil, walCorruptionErrf("unexpected CRC32 checksum %x, want %x", has, exp)
+		return 0, 0, nil, r.corruptionErr("unexpected CRC32 checksum %x, want %x", has, exp)
 	}
 
 	return etype, flag, buf, nil
 }
 
-func (r *walReader) decodeSeries(flag byte, b []byte) ([]labels.Labels, error) {
-	series := []labels.Labels{}
-	for len(b) > 0 {
-		l, n := binary.Uvarint(b)
-		if n < 1 {
-			return nil, errors.Wrap(errInvalidSize, "number of labels")
+func (r *walReader) decodeSeries(flag byte, b []byte) ([]RefSeries, error) {
+	series := []RefSeries{}
+	dec := decbuf{b: b}
+
+	for len(dec.b) > 0 && dec.err() == nil {
+		ref := dec.be64()
+
+		lset := make(labels.Labels, dec.uvarint())
+
+		for i := range lset {
+			lset[i].Name = dec.uvarintStr()
+			lset[i].Value = dec.uvarintStr()
 		}
-		b = b[n:]
-		lset := make(labels.Labels, l)
+		sort.Sort(lset)
 
-		for i := 0; i < int(l); i++ {
-			nl, n := binary.Uvarint(b)
-			if n < 1 || len(b) < n+int(nl) {
-				return nil, errors.Wrap(errInvalidSize, "label name")
-			}
-			lset[i].Name = string(b[n : n+int(nl)])
-			b = b[n+int(nl):]
-
-			vl, n := binary.Uvarint(b)
-			if n < 1 || len(b) < n+int(vl) {
-				return nil, errors.Wrap(errInvalidSize, "label value")
-			}
-			lset[i].Value = string(b[n : n+int(vl)])
-			b = b[n+int(vl):]
-		}
-
-		series = append(series, lset)
+		series = append(series, RefSeries{
+			Ref:    ref,
+			Labels: lset,
+		})
+	}
+	if dec.err() != nil {
+		return nil, dec.err()
+	}
+	if len(dec.b) > 0 {
+		return series, errors.Errorf("unexpected %d bytes left in entry", len(dec.b))
 	}
 	return series, nil
 }
 
 func (r *walReader) decodeSamples(flag byte, b []byte) ([]RefSample, error) {
-	samples := []RefSample{}
-
-	if len(b) < 16 {
-		return nil, errors.Wrap(errInvalidSize, "header length")
+	if len(b) == 0 {
+		return nil, nil
 	}
+	samples := []RefSample{}
+	dec := decbuf{b: b}
+
 	var (
-		baseRef  = binary.BigEndian.Uint64(b)
-		baseTime = int64(binary.BigEndian.Uint64(b[8:]))
+		baseRef  = dec.be64()
+		baseTime = dec.be64int64()
 	)
-	b = b[16:]
 
-	for len(b) > 0 {
-		var smpl RefSample
+	for len(dec.b) > 0 && dec.err() == nil {
+		dref := dec.varint64()
+		dtime := dec.varint64()
+		val := dec.be64()
 
-		dref, n := binary.Varint(b)
-		if n < 1 {
-			return nil, errors.Wrap(errInvalidSize, "sample ref delta")
-		}
-		b = b[n:]
+		samples = append(samples, RefSample{
+			Ref: uint64(int64(baseRef) + dref),
+			T:   baseTime + dtime,
+			V:   math.Float64frombits(val),
+		})
+	}
 
-		smpl.Ref = uint64(int64(baseRef) + dref)
-
-		dtime, n := binary.Varint(b)
-		if n < 1 {
-			return nil, errors.Wrap(errInvalidSize, "sample timestamp delta")
-		}
-		b = b[n:]
-		smpl.T = baseTime + dtime
-
-		if len(b) < 8 {
-			return nil, errors.Wrapf(errInvalidSize, "sample value bits %d", len(b))
-		}
-		smpl.V = float64(math.Float64frombits(binary.BigEndian.Uint64(b)))
-		b = b[8:]
-
-		samples = append(samples, smpl)
+	if dec.err() != nil {
+		return nil, errors.Wrapf(dec.err(), "decode error after %d samples", len(samples))
+	}
+	if len(dec.b) > 0 {
+		return samples, errors.Errorf("unexpected %d bytes left in entry", len(dec.b))
 	}
 	return samples, nil
 }
 
 func (r *walReader) decodeDeletes(flag byte, b []byte) ([]Stone, error) {
-	db := &decbuf{b: b}
-	stones := []Stone{}
+	dec := &decbuf{b: b}
+	var stones []Stone
 
-	for db.len() > 0 {
-		var s Stone
-		s.ref = db.uvarint32()
-		s.intervals = intervals{{db.varint64(), db.varint64()}}
-		if db.err() != nil {
-			return nil, db.err()
-		}
-
-		stones = append(stones, s)
+	for dec.len() > 0 && dec.err() == nil {
+		stones = append(stones, Stone{
+			ref: dec.be64(),
+			intervals: Intervals{
+				{Mint: dec.varint64(), Maxt: dec.varint64()},
+			},
+		})
+	}
+	if dec.err() != nil {
+		return nil, dec.err()
+	}
+	if len(dec.b) > 0 {
+		return stones, errors.Errorf("unexpected %d bytes left in entry", len(dec.b))
 	}
-
 	return stones, nil
 }
diff --git a/vendor/vendor.json b/vendor/vendor.json
index ed23fd4e55..bb499c8b40 100644
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@@ -859,22 +859,22 @@
 			"revisionTime": "2016-04-11T19:08:41Z"
 		},
 		{
-			"checksumSHA1": "WvgmP/a6PVjj33/h8L7XrNUmoQE=",
+			"checksumSHA1": "AoNkGFKIyLNi4a/QcO8p5D7xIXs=",
 			"path": "github.com/prometheus/tsdb",
-			"revision": "c4ca881685ae1266a75caf57da46d8b6934213c0",
-			"revisionTime": "2017-08-18T07:54:27Z"
+			"revision": "0db4c227b72145418ad4c1fbda8fdb87bfe77a02",
+			"revisionTime": "2017-09-07T11:04:02Z"
 		},
 		{
 			"checksumSHA1": "Gua979gmISm4cJP/fR2hL8m5To8=",
 			"path": "github.com/prometheus/tsdb/chunks",
-			"revision": "c4ca881685ae1266a75caf57da46d8b6934213c0",
-			"revisionTime": "2017-08-18T07:54:27Z"
+			"revision": "0db4c227b72145418ad4c1fbda8fdb87bfe77a02",
+			"revisionTime": "2017-09-07T11:04:02Z"
 		},
 		{
 			"checksumSHA1": "zhmlvc322RH1L3l9DaA9d/HVVWs=",
 			"path": "github.com/prometheus/tsdb/labels",
-			"revision": "c4ca881685ae1266a75caf57da46d8b6934213c0",
-			"revisionTime": "2017-08-18T07:54:27Z"
+			"revision": "0db4c227b72145418ad4c1fbda8fdb87bfe77a02",
+			"revisionTime": "2017-09-07T11:04:02Z"
 		},
 		{
 			"checksumSHA1": "5SYLEhADhdBVZAGPVHWggQl7H8k=",