From 000e306f2fbb373bc37fa8e710553b072fac71a1 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Mon, 6 Jan 2020 14:06:11 +0000 Subject: [PATCH] Handle V1 indexes, some of which have unsorted posting offset tables. (#6564) Fixes #6535 Signed-off-by: Brian Brazil --- tsdb/block_test.go | 26 ++-- tsdb/index/index.go | 128 ++++++++++++++------ tsdb/testdata/index_format_v1/chunks/000001 | Bin 44 -> 1844 bytes tsdb/testdata/index_format_v1/index | Bin 238 -> 5407 bytes tsdb/testdata/index_format_v1/meta.json | 10 +- 5 files changed, 112 insertions(+), 52 deletions(-) diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 0f83ac5c04..ef19468a34 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -267,16 +267,20 @@ func TestBlockSize(t *testing.T) { } func TestReadIndexFormatV1(t *testing.T) { - /* The block here was produced at commit - 07ef80820ef1250db82f9544f3fcf7f0f63ccee0 with: - db, _ := Open("v1db", nil, nil, nil) - app := db.Appender() - app.Add(labels.FromStrings("foo", "bar"), 1, 2) - app.Add(labels.FromStrings("foo", "baz"), 3, 4) - app.Add(labels.FromStrings("foo", "meh"), 1000*3600*4, 4) // Not in the block. - app.Commit() - db.compact() - db.Close() + /* The block here was produced at the commit + 706602daed1487f7849990678b4ece4599745905 used in 2.0.0 with: + db, _ := Open("v1db", nil, nil, nil) + app := db.Appender() + app.Add(labels.FromStrings("foo", "bar"), 1, 2) + app.Add(labels.FromStrings("foo", "baz"), 3, 4) + app.Add(labels.FromStrings("foo", "meh"), 1000*3600*4, 4) // Not in the block. + // Make sure we've enough values for the lack of sorting of postings offsets to show up. + for i := 0; i < 100; i++ { + app.Add(labels.FromStrings("bar", strconv.FormatInt(int64(i), 10)), 0, 0) + } + app.Commit() + db.compact() + db.Close() */ blockDir := filepath.Join("testdata", "index_format_v1") @@ -290,7 +294,7 @@ func TestReadIndexFormatV1(t *testing.T) { q, err = NewBlockQuerier(block, 0, 1000) testutil.Ok(t, err) - testutil.Equals(t, query(t, q, labels.MustNewMatcher(labels.MatchNotRegexp, "foo", "^.$")), + testutil.Equals(t, query(t, q, labels.MustNewMatcher(labels.MatchNotRegexp, "foo", "^.?$")), map[string][]tsdbutil.Sample{ `{foo="bar"}`: []tsdbutil.Sample{sample{t: 1, v: 2}}, `{foo="baz"}`: []tsdbutil.Sample{sample{t: 3, v: 4}}, diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 18fa0360e0..dd1a0dd379 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -1024,6 +1024,8 @@ type Reader struct { // Map of LabelName to a list of some LabelValues's position in the offset table. // The first and last values for each name are always present. postings map[string][]postingOffset + // For the v1 format, labelname -> labelvalue -> offset. + postingsV1 map[string]map[string]uint64 symbols *Symbols nameSymbols map[uint32]string // Cache of the label name symbol lookups, @@ -1113,45 +1115,64 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { return nil, errors.Wrap(err, "read symbols") } - var lastKey []string - lastOff := 0 - valueCount := 0 - // For the postings offset table we keep every label name but only every nth - // label value (plus the first and last one), to save memory. - if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(key []string, _ uint64, off int) error { - if len(key) != 2 { - return errors.Errorf("unexpected key length for posting table %d", len(key)) - } - if _, ok := r.postings[key[0]]; !ok { - // Next label name. - r.postings[key[0]] = []postingOffset{} - if lastKey != nil { - // Always include last value for each label name. - r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) + if r.version == FormatV1 { + // Earlier V1 formats don't have a sorted postings offset table, so + // load the whole offset table into memory. + r.postingsV1 = map[string]map[string]uint64{} + if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(key []string, off uint64, _ int) error { + if len(key) != 2 { + return errors.Errorf("unexpected key length for posting table %d", len(key)) } - lastKey = nil - valueCount = 0 + if _, ok := r.postingsV1[key[0]]; !ok { + r.postingsV1[key[0]] = map[string]uint64{} + r.postings[key[0]] = nil // Used to get a list of labelnames in places. + } + r.postingsV1[key[0]][key[1]] = off + return nil + }); err != nil { + return nil, errors.Wrap(err, "read postings table") } - if valueCount%32 == 0 { - r.postings[key[0]] = append(r.postings[key[0]], postingOffset{value: key[1], off: off}) - lastKey = nil - } else { - lastKey = key - lastOff = off + } else { + var lastKey []string + lastOff := 0 + valueCount := 0 + // For the postings offset table we keep every label name but only every nth + // label value (plus the first and last one), to save memory. + if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(key []string, _ uint64, off int) error { + if len(key) != 2 { + return errors.Errorf("unexpected key length for posting table %d", len(key)) + } + if _, ok := r.postings[key[0]]; !ok { + // Next label name. + r.postings[key[0]] = []postingOffset{} + if lastKey != nil { + // Always include last value for each label name. + r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) + } + lastKey = nil + valueCount = 0 + } + if valueCount%32 == 0 { + r.postings[key[0]] = append(r.postings[key[0]], postingOffset{value: key[1], off: off}) + lastKey = nil + } else { + lastKey = key + lastOff = off + } + valueCount++ + return nil + }); err != nil { + return nil, errors.Wrap(err, "read postings table") + } + if lastKey != nil { + r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) + } + // Trim any extra space in the slices. + for k, v := range r.postings { + l := make([]postingOffset, len(v)) + copy(l, v) + r.postings[k] = l } - valueCount++ - return nil - }); err != nil { - return nil, errors.Wrap(err, "read postings table") - } - if lastKey != nil { - r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) - } - // Trim any extra space in the slices. - for k, v := range r.postings { - l := make([]postingOffset, len(v)) - copy(l, v) - r.postings[k] = l } r.nameSymbols = make(map[uint32]string, len(r.postings)) @@ -1408,6 +1429,19 @@ func (r *Reader) LabelValues(names ...string) (StringTuples, error) { if len(names) != 1 { return nil, errors.Errorf("only one label name supported") } + if r.version == FormatV1 { + e, ok := r.postingsV1[names[0]] + if !ok { + return emptyStringTuples{}, nil + } + values := make([]string, 0, len(e)) + for k := range e { + values = append(values, k) + } + sort.Strings(values) + return NewStringTuples(values, 1) + + } e, ok := r.postings[names[0]] if !ok { return emptyStringTuples{}, nil @@ -1467,6 +1501,28 @@ func (r *Reader) Series(id uint64, lbls *labels.Labels, chks *[]chunks.Meta) err } func (r *Reader) Postings(name string, values ...string) (Postings, error) { + if r.version == FormatV1 { + e, ok := r.postingsV1[name] + if !ok { + return EmptyPostings(), nil + } + res := make([]Postings, 0, len(values)) + for _, v := range values { + postingsOff, ok := e[v] + if !ok { + continue + } + // Read from the postings table. + d := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) + _, p, err := r.dec.Postings(d.Get()) + if err != nil { + return nil, errors.Wrap(err, "decode postings") + } + res = append(res, p) + } + return Merge(res...), nil + } + e, ok := r.postings[name] if !ok { return EmptyPostings(), nil diff --git a/tsdb/testdata/index_format_v1/chunks/000001 b/tsdb/testdata/index_format_v1/chunks/000001 index 1f2250b865ef7234a1bcc92dee7f76e033aadc03..ac544dbbef997224e7b69248bab6e0b945bbc7a4 100644 GIT binary patch literal 1844 zcmZqO>u{Hmfq{XCkpVqT-kXDdqcr+Q)Q+qOoeJp)tPEA)nuwAXiW1lO=6nN zG=*s@)6k+b&0w0zG)vH#?qj;dbeZW2(^aNxOh==^bd%|pU@*hS42c;sGZbd1%+Q#j zGXw1=Gc3VmrjMBtGi7Ef%v71FF;i!z!Az7a!D5z=SrW5kW+}{4nWZsHXO_V%lUbH~ z+}L-(9CzH?Z@Z^WnE3G%Tl$9z*(?%s7+RRWD1@XgBzn+)ZhlWvAJTcj^#LkLBS^2E zC7pMZG=U86I%Ky%5)BzwmitORNlVDAxyrn-Fh~p}NjTQyUwg>f!P0~t8>+^Tb%5;< zyn|PcC+i5y6ZSVgKaQ+3?36IGVEP2IZm?3JAm*GwCc*9qEzj=wflP%ZvQK_YC?hjq zYuNUxCBwyU1T$!5bgdgfJ} z2ql{X+v|CvTiblHd9V|n&^g_s!eI#lw8&$&|L9xNg``(KmpeUmmo$mA#xwWW*wLhm zNtb#z_j{%pX$onEx9skwWYT4%UwM1SH#tnYg7mmonH_eBbT#RDZ$+ave$rIZO7GRg zb8AW0kv{as=L>nH>q%2WUOaPPA!!EbhLCsCn;#_IM7lfV`ID`uk!~eD8q!E_(2{gJ z>A8^lcOTCp%_6M``KE>HBF!eP4)KSyeT8%n>5|axX=8Vg?j=nRmE)hPJ^29Xm!a1` z`beW{F6oib==&vqkRBvG6WV&!uEwNCNH2y?pOUzY^cd;=&{H>jIix2@lf!Z^w#X(u zNxClVcGmJ}(gM<~u*Rple??kIdMIpP?9xw2Pm}%_7MH%U_VUk>mW36}nskz?7fA1h zwf!)!Eom`nQh29758osGg)}X^hE1JKT1vVj{Oj-A#gdkj=7kUU-_IbeAUze%j7L9` zULh?FFV6mc0cjQKo$$H^H*5FCP13|VtJL(`OSnzCrcUSfCu%p+UDC`tuMW*VO|N}G znp@}Ih2|Bc4@nE^WGq`!yOI7P{k6`PtB)R2HGx5I)#?4w(@CTYNf$)y&-!&FX%gw` zh%PUr-X~p5x;3I%QuXtsDWo|Otv`IHc72zT=0`j`u6!I-SCE!OoT@IGK)RarM#Nk7 z8rCOGB~6GNVGTV&x{h>ZqM7oL8AK9b$+HTUV zq{Wd}zCKpl)ppXV$W^y~*hSSW(!V47uDXP-4zO${FxT>^Ya7!I9rOgxfj*!w=nn>hm%$J) z3=9V&z$h>Vj0bUG5||3!2Jv7Pm<{HDc_0BS1W8~qNCC^h3a}caf^}d$$N-zbR;(rvE;tB|fMehUI0*_sAvg`rfeWA*`~pfrIj8_vKoz(NZiBnv0eA@h3QViu z#?cGHKm@1@>Vrn035W(QK@4aII)IL#a~H*!ihk+=S|5f(H=yV{Kkf$)4Ya#^$82ay zf0w&h9)9*R`nFvdeE-nr3mg|0yB>@jcx zzrEnNd+p@oLuv2WLKkRvb$6$6xQ=GGJhrFCAy4 zGX}nJ#&PeY;|q?{y7BWpw<{a@A35%I{c=Ag==xdTyIY&2J9Xw-HTSwd$k$EQt;>!J zv)RdaX>aiUg~_`P|6Rv%mif&M*B6e9`=^I={Mm7DtVis=D#(vTKK(|}^$O>QUg^`o zsf+cSCRML-+>`u!uYTPf_rBlZK*D*)-Qm}Ij9L`rKgHD~&{+9+S<1+h&Bg@z{t*BB zv`=~6t_U`+1O0Bhm?5G(;}0s3*{esE(=+&=+~$Nh)PB{G10B*p+dm&9J+B`^|9 z0doMJ10vpE#BBg$5x)cHKsmrQdGK8G)C1Trp01!Lz}$M?1ehDo2Ot@wf~{a5!1Z{} zf--R1=$y>&q=&;|4Wv0wz41l|YO&)zj)3)l;=zFy3U_jhm$Jib3UdnP&x1+`=P z^W}2%Z#%nDqCUJA4Qpw;c&D;r6UJq` zFslEWvqKygx24F_Fz0rinH~GqVE$|uvR+*E?30drgywU$Ee-On6V5fB73BM--|PLl z<7`IR#jmjzc7LAZ*S(*;=D2swq{pe(92dJKw#3%^=Y03ntT#K-{|bD*ggg6Ajli1N z&N>yfi^Y1{E+m#cxS~7m-JAKrq@#|r7N^Eu!hK+$&uY31d1;#Cp49A_dGU_3aScC? z!u?~P?;bsGVLhz5?cU!Dc5Mq9pZkw7rB|>&|Eb^VnpQs6anIR~D|PG#yDqZk$pt~L z7dEVo^#+~K<_}8u2JH_qt?X#iw$AIlt;YU%7x#hPpKx)ZG-sIO-X0mRg}>{#(C6On zok8!h;uKbsxXG!D=WaaAALO{ZL(C2-8`jEc6zR@c`33upUsQjBz z1>ZgR>j#o<76p6e+n;pg&1Yvy+s`lOpHvq-*{_ z609QLWEqt_NmeyanwH!+(4AqX@np%FJSkQVPnzVn+k8cKQYv{;rE2>;U-A?Da+NeY znVCE(S`JT=;SVH1E#gh8QpuB{RP&^n$xZn1q%@vXBQuZ$J%=|*s^3m#5l@;`$&;y9 z+ih}k)7tI?Gc6GCHBqLWj2xaUpPwgPFS4szWmAR^wYK`wTLcEC7+E~YMjlVPRKSy= zmhhxnRd&^?;YsqPv<&p3tLZ$+S{6@|o)<`hQNWuFxr8UvSH+WT)!1!PN~=J3mYU9! zW@PcCYI!`#z5+X`B|J$=6;GC4V^@`w*8C#Pbe;?=iznI4<4IKuc(SY#o;0NjDR9rc zQ^OlgIVC30p{AzuBxzZJBq({jNmmMZGK~^ml&kC}tA;oEQrhsz^QGI#$l^)$<+XV* mZ|8J80O-dPIMBZx^b=D19LJ}Mf`Etm1IKz52Y-0J(fN*1>0|SEq5Hm9;B^Cj36?0mCzVvtbJa$G=UPdMc4sVkHCLmjok&QvBuYYeSO05M3dth9gFEF`|>8?z1*AfF2) z&j_;DLfOS}3y>!R#LPgW1DHUfAeW`USXCu;zg{i?%7Xwagkk{M1_GuKilGHc2SDj* JGkz-^2LQA1BPsv@ diff --git a/tsdb/testdata/index_format_v1/meta.json b/tsdb/testdata/index_format_v1/meta.json index 62347db26c..d99ae6b49c 100644 --- a/tsdb/testdata/index_format_v1/meta.json +++ b/tsdb/testdata/index_format_v1/meta.json @@ -1,17 +1,17 @@ { "version": 1, - "ulid": "01DVZX4CHY2EGZ6JQVS80AB9CF", + "ulid": "01DXXFZDYD1MQW6079WK0K6EDQ", "minTime": 0, "maxTime": 7200000, "stats": { - "numSamples": 2, - "numSeries": 2, - "numChunks": 2 + "numSamples": 102, + "numSeries": 102, + "numChunks": 102 }, "compaction": { "level": 1, "sources": [ - "01DVZX4CHY2EGZ6JQVS80AB9CF" + "01DXXFZDYD1MQW6079WK0K6EDQ" ] } }