|
| 1 | +/* |
| 2 | + * Copyright 2021 Dgraph Labs, Inc. and Contributors |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package cmd |
| 18 | + |
| 19 | +import ( |
| 20 | + "bytes" |
| 21 | + "fmt" |
| 22 | + "os" |
| 23 | + "runtime/pprof" |
| 24 | + "sort" |
| 25 | + "testing" |
| 26 | + |
| 27 | + "github.com/spf13/cobra" |
| 28 | + |
| 29 | + "github.com/dgraph-io/badger/v3" |
| 30 | + "github.com/dgraph-io/badger/v3/options" |
| 31 | + "github.com/dgraph-io/badger/v3/table" |
| 32 | + "github.com/dgraph-io/badger/v3/y" |
| 33 | +) |
| 34 | + |
| 35 | +var pickBenchCmd = &cobra.Command{ |
| 36 | + Use: "picktable", |
| 37 | + Short: "Benchmark pick tables.", |
| 38 | + Long: `This command simulates pickTables used in iterators.`, |
| 39 | + RunE: pickTableBench, |
| 40 | +} |
| 41 | + |
| 42 | +var ( |
| 43 | + pickOpts = struct { |
| 44 | + readOnly bool |
| 45 | + sampleSize int |
| 46 | + cpuprofile string |
| 47 | + }{} |
| 48 | + keys [][]byte |
| 49 | + handler levelHandler |
| 50 | +) |
| 51 | + |
| 52 | +func init() { |
| 53 | + benchCmd.AddCommand(pickBenchCmd) |
| 54 | + pickBenchCmd.Flags().BoolVar( |
| 55 | + &pickOpts.readOnly, "read-only", true, "If true, DB will be opened in read only mode.") |
| 56 | + pickBenchCmd.Flags().IntVar( |
| 57 | + &pickOpts.sampleSize, "sample-size", 1000000, "Sample size of keys to be used for lookup.") |
| 58 | + pickBenchCmd.Flags().StringVar( |
| 59 | + &pickOpts.cpuprofile, "cpuprofile", "", "Write CPU profile to file.") |
| 60 | +} |
| 61 | + |
| 62 | +func pickTableBench(cmd *cobra.Command, args []string) error { |
| 63 | + opt := badger.DefaultOptions(sstDir). |
| 64 | + WithValueDir(vlogDir). |
| 65 | + WithReadOnly(pickOpts.readOnly) |
| 66 | + fmt.Printf("Opening badger with options = %+v\n", opt) |
| 67 | + db, err := badger.OpenManaged(opt) |
| 68 | + if err != nil { |
| 69 | + return y.Wrapf(err, "unable to open DB") |
| 70 | + } |
| 71 | + defer func() { |
| 72 | + y.Check(db.Close()) |
| 73 | + }() |
| 74 | + |
| 75 | + boundaries := getBoundaries(db) |
| 76 | + tables := genTables(boundaries) |
| 77 | + handler.init(tables) |
| 78 | + keys, err = getSampleKeys(db, pickOpts.sampleSize) |
| 79 | + y.Check(err) |
| 80 | + fmt.Println("Running benchmark...") |
| 81 | + fmt.Println("***** BenchmarkPickTables *****") |
| 82 | + fmt.Println(testing.Benchmark(BenchmarkPickTables)) |
| 83 | + fmt.Println("*******************************") |
| 84 | + return nil |
| 85 | +} |
| 86 | + |
| 87 | +func BenchmarkPickTables(b *testing.B) { |
| 88 | + if len(pickOpts.cpuprofile) > 0 { |
| 89 | + f, err := os.Create(pickOpts.cpuprofile) |
| 90 | + y.Check(err) |
| 91 | + err = pprof.StartCPUProfile(f) |
| 92 | + y.Check(err) |
| 93 | + defer pprof.StopCPUProfile() |
| 94 | + } |
| 95 | + b.ResetTimer() |
| 96 | + iopts := iteratorOptions{prefixIsKey: true} |
| 97 | + for i := 0; i < b.N; i++ { |
| 98 | + for _, key := range keys { |
| 99 | + iopts.Prefix = key |
| 100 | + _ = handler.pickTables(iopts) |
| 101 | + } |
| 102 | + } |
| 103 | +} |
| 104 | + |
| 105 | +// See badger.IteratorOptions (iterator.go) |
| 106 | +type iteratorOptions struct { |
| 107 | + prefixIsKey bool // If set, use the prefix for bloom filter lookup. |
| 108 | + Prefix []byte // Only iterate over this given prefix. |
| 109 | + SinceTs uint64 // Only read data that has version > SinceTs. |
| 110 | +} |
| 111 | + |
| 112 | +// See compareToPrefix in iterator.go |
| 113 | +func (opt *iteratorOptions) compareToPrefix(key []byte) int { |
| 114 | + // We should compare key without timestamp. For example key - a[TS] might be > "aa" prefix. |
| 115 | + key = y.ParseKey(key) |
| 116 | + if len(key) > len(opt.Prefix) { |
| 117 | + key = key[:len(opt.Prefix)] |
| 118 | + } |
| 119 | + return bytes.Compare(key, opt.Prefix) |
| 120 | +} |
| 121 | + |
| 122 | +// See levelHandler in level_handler.go |
| 123 | +type levelHandler struct { |
| 124 | + tables []*table.Table |
| 125 | +} |
| 126 | + |
| 127 | +func (s *levelHandler) init(tables []*table.Table) { |
| 128 | + fmt.Println("Initializing level handler...") |
| 129 | + s.tables = tables |
| 130 | +} |
| 131 | + |
| 132 | +// This implementation is based on the implementation in master branch. |
| 133 | +func (s *levelHandler) pickTables(opt iteratorOptions) []*table.Table { |
| 134 | + filterTables := func(tables []*table.Table) []*table.Table { |
| 135 | + if opt.SinceTs > 0 { |
| 136 | + tmp := tables[:0] |
| 137 | + for _, t := range tables { |
| 138 | + if t.MaxVersion() < opt.SinceTs { |
| 139 | + continue |
| 140 | + } |
| 141 | + tmp = append(tmp, t) |
| 142 | + } |
| 143 | + tables = tmp |
| 144 | + } |
| 145 | + return tables |
| 146 | + } |
| 147 | + |
| 148 | + all := s.tables |
| 149 | + if len(opt.Prefix) == 0 { |
| 150 | + out := make([]*table.Table, len(all)) |
| 151 | + copy(out, all) |
| 152 | + return filterTables(out) |
| 153 | + } |
| 154 | + sIdx := sort.Search(len(all), func(i int) bool { |
| 155 | + // table.Biggest >= opt.prefix |
| 156 | + // if opt.Prefix < table.Biggest, then surely it is not in any of the preceding tables. |
| 157 | + return opt.compareToPrefix(all[i].Biggest()) >= 0 |
| 158 | + }) |
| 159 | + if sIdx == len(all) { |
| 160 | + // Not found. |
| 161 | + return []*table.Table{} |
| 162 | + } |
| 163 | + |
| 164 | + filtered := all[sIdx:] |
| 165 | + if !opt.prefixIsKey { |
| 166 | + eIdx := sort.Search(len(filtered), func(i int) bool { |
| 167 | + return opt.compareToPrefix(filtered[i].Smallest()) > 0 |
| 168 | + }) |
| 169 | + out := make([]*table.Table, len(filtered[:eIdx])) |
| 170 | + copy(out, filtered[:eIdx]) |
| 171 | + return filterTables(out) |
| 172 | + } |
| 173 | + |
| 174 | + // opt.prefixIsKey == true. This code is optimizing for opt.prefixIsKey part. |
| 175 | + var out []*table.Table |
| 176 | + // hash := y.Hash(opt.Prefix) |
| 177 | + for _, t := range filtered { |
| 178 | + // When we encounter the first table whose smallest key is higher than opt.Prefix, we can |
| 179 | + // stop. This is an IMPORTANT optimization, just considering how often we call |
| 180 | + // NewKeyIterator. |
| 181 | + if opt.compareToPrefix(t.Smallest()) > 0 { |
| 182 | + // if table.Smallest > opt.Prefix, then this and all tables after this can be ignored. |
| 183 | + break |
| 184 | + } |
| 185 | + out = append(out, t) |
| 186 | + } |
| 187 | + return filterTables(out) |
| 188 | +} |
| 189 | + |
| 190 | +// Sorts the boundaries and creates mock table out of them. |
| 191 | +func genTables(boundaries [][]byte) []*table.Table { |
| 192 | + buildTable := func(k1, k2 []byte) *table.Table { |
| 193 | + opts := table.Options{ |
| 194 | + ChkMode: options.NoVerification, |
| 195 | + } |
| 196 | + b := table.NewTableBuilder(opts) |
| 197 | + defer b.Close() |
| 198 | + // Add one key so that we can open this table. |
| 199 | + b.Add(y.KeyWithTs(k1, 1), y.ValueStruct{}, 0) |
| 200 | + b.Add(y.KeyWithTs(k2, 1), y.ValueStruct{}, 0) |
| 201 | + tab, err := table.OpenInMemoryTable(b.Finish(), 0, &opts) |
| 202 | + y.Check(err) |
| 203 | + return tab |
| 204 | + } |
| 205 | + |
| 206 | + sort.Slice(boundaries, func(i, j int) bool { |
| 207 | + return bytes.Compare(boundaries[i], boundaries[j]) < 0 |
| 208 | + }) |
| 209 | + out := make([]*table.Table, 0, len(boundaries)) |
| 210 | + for i := range boundaries { |
| 211 | + var j int |
| 212 | + if i != 0 { |
| 213 | + j = i - 1 |
| 214 | + } |
| 215 | + out = append(out, buildTable(boundaries[i], boundaries[j])) |
| 216 | + } |
| 217 | + fmt.Printf("Created %d mock tables.\n", len(out)) |
| 218 | + return out |
| 219 | +} |
| 220 | + |
| 221 | +func getBoundaries(db *badger.DB) [][]byte { |
| 222 | + fmt.Println("Getting the table boundaries...") |
| 223 | + tables := db.Tables() |
| 224 | + out := make([][]byte, 0, 2*len(tables)) |
| 225 | + for _, t := range tables { |
| 226 | + out = append(out, t.Left, t.Right) |
| 227 | + } |
| 228 | + return out |
| 229 | +} |
0 commit comments