Skip to content

Commit 4a3b224

Browse files
fix(bench): bring in benchmark fixes from main (#1863)
Cherry pick of #1699 and #1741.
1 parent 1dce1d4 commit 4a3b224

File tree

7 files changed

+242
-11
lines changed

7 files changed

+242
-11
lines changed

badger/cmd/bench.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ import (
2323
var benchCmd = &cobra.Command{
2424
Use: "benchmark",
2525
Short: "Benchmark Badger database.",
26-
Long: `This command will benchmark Badger for different usecases. Currently only read benchmark
27-
is supported. Useful for testing and performance analysis.`,
26+
Long: `This command will benchmark Badger for different usecases.
27+
Useful for testing and performance analysis.`,
2828
}
2929

3030
func init() {

badger/cmd/pick_table_bench.go

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
/*
2+
* Copyright 2021 Dgraph Labs, Inc. and Contributors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package cmd
18+
19+
import (
20+
"bytes"
21+
"fmt"
22+
"os"
23+
"runtime/pprof"
24+
"sort"
25+
"testing"
26+
27+
"github.com/spf13/cobra"
28+
29+
"github.com/dgraph-io/badger/v3"
30+
"github.com/dgraph-io/badger/v3/options"
31+
"github.com/dgraph-io/badger/v3/table"
32+
"github.com/dgraph-io/badger/v3/y"
33+
)
34+
35+
var pickBenchCmd = &cobra.Command{
36+
Use: "picktable",
37+
Short: "Benchmark pick tables.",
38+
Long: `This command simulates pickTables used in iterators.`,
39+
RunE: pickTableBench,
40+
}
41+
42+
var (
43+
pickOpts = struct {
44+
readOnly bool
45+
sampleSize int
46+
cpuprofile string
47+
}{}
48+
keys [][]byte
49+
handler levelHandler
50+
)
51+
52+
func init() {
53+
benchCmd.AddCommand(pickBenchCmd)
54+
pickBenchCmd.Flags().BoolVar(
55+
&pickOpts.readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
56+
pickBenchCmd.Flags().IntVar(
57+
&pickOpts.sampleSize, "sample-size", 1000000, "Sample size of keys to be used for lookup.")
58+
pickBenchCmd.Flags().StringVar(
59+
&pickOpts.cpuprofile, "cpuprofile", "", "Write CPU profile to file.")
60+
}
61+
62+
func pickTableBench(cmd *cobra.Command, args []string) error {
63+
opt := badger.DefaultOptions(sstDir).
64+
WithValueDir(vlogDir).
65+
WithReadOnly(pickOpts.readOnly)
66+
fmt.Printf("Opening badger with options = %+v\n", opt)
67+
db, err := badger.OpenManaged(opt)
68+
if err != nil {
69+
return y.Wrapf(err, "unable to open DB")
70+
}
71+
defer func() {
72+
y.Check(db.Close())
73+
}()
74+
75+
boundaries := getBoundaries(db)
76+
tables := genTables(boundaries)
77+
handler.init(tables)
78+
keys, err = getSampleKeys(db, pickOpts.sampleSize)
79+
y.Check(err)
80+
fmt.Println("Running benchmark...")
81+
fmt.Println("***** BenchmarkPickTables *****")
82+
fmt.Println(testing.Benchmark(BenchmarkPickTables))
83+
fmt.Println("*******************************")
84+
return nil
85+
}
86+
87+
func BenchmarkPickTables(b *testing.B) {
88+
if len(pickOpts.cpuprofile) > 0 {
89+
f, err := os.Create(pickOpts.cpuprofile)
90+
y.Check(err)
91+
err = pprof.StartCPUProfile(f)
92+
y.Check(err)
93+
defer pprof.StopCPUProfile()
94+
}
95+
b.ResetTimer()
96+
iopts := iteratorOptions{prefixIsKey: true}
97+
for i := 0; i < b.N; i++ {
98+
for _, key := range keys {
99+
iopts.Prefix = key
100+
_ = handler.pickTables(iopts)
101+
}
102+
}
103+
}
104+
105+
// See badger.IteratorOptions (iterator.go)
106+
type iteratorOptions struct {
107+
prefixIsKey bool // If set, use the prefix for bloom filter lookup.
108+
Prefix []byte // Only iterate over this given prefix.
109+
SinceTs uint64 // Only read data that has version > SinceTs.
110+
}
111+
112+
// See compareToPrefix in iterator.go
113+
func (opt *iteratorOptions) compareToPrefix(key []byte) int {
114+
// We should compare key without timestamp. For example key - a[TS] might be > "aa" prefix.
115+
key = y.ParseKey(key)
116+
if len(key) > len(opt.Prefix) {
117+
key = key[:len(opt.Prefix)]
118+
}
119+
return bytes.Compare(key, opt.Prefix)
120+
}
121+
122+
// See levelHandler in level_handler.go
123+
type levelHandler struct {
124+
tables []*table.Table
125+
}
126+
127+
func (s *levelHandler) init(tables []*table.Table) {
128+
fmt.Println("Initializing level handler...")
129+
s.tables = tables
130+
}
131+
132+
// This implementation is based on the implementation in master branch.
133+
func (s *levelHandler) pickTables(opt iteratorOptions) []*table.Table {
134+
filterTables := func(tables []*table.Table) []*table.Table {
135+
if opt.SinceTs > 0 {
136+
tmp := tables[:0]
137+
for _, t := range tables {
138+
if t.MaxVersion() < opt.SinceTs {
139+
continue
140+
}
141+
tmp = append(tmp, t)
142+
}
143+
tables = tmp
144+
}
145+
return tables
146+
}
147+
148+
all := s.tables
149+
if len(opt.Prefix) == 0 {
150+
out := make([]*table.Table, len(all))
151+
copy(out, all)
152+
return filterTables(out)
153+
}
154+
sIdx := sort.Search(len(all), func(i int) bool {
155+
// table.Biggest >= opt.prefix
156+
// if opt.Prefix < table.Biggest, then surely it is not in any of the preceding tables.
157+
return opt.compareToPrefix(all[i].Biggest()) >= 0
158+
})
159+
if sIdx == len(all) {
160+
// Not found.
161+
return []*table.Table{}
162+
}
163+
164+
filtered := all[sIdx:]
165+
if !opt.prefixIsKey {
166+
eIdx := sort.Search(len(filtered), func(i int) bool {
167+
return opt.compareToPrefix(filtered[i].Smallest()) > 0
168+
})
169+
out := make([]*table.Table, len(filtered[:eIdx]))
170+
copy(out, filtered[:eIdx])
171+
return filterTables(out)
172+
}
173+
174+
// opt.prefixIsKey == true. This code is optimizing for opt.prefixIsKey part.
175+
var out []*table.Table
176+
// hash := y.Hash(opt.Prefix)
177+
for _, t := range filtered {
178+
// When we encounter the first table whose smallest key is higher than opt.Prefix, we can
179+
// stop. This is an IMPORTANT optimization, just considering how often we call
180+
// NewKeyIterator.
181+
if opt.compareToPrefix(t.Smallest()) > 0 {
182+
// if table.Smallest > opt.Prefix, then this and all tables after this can be ignored.
183+
break
184+
}
185+
out = append(out, t)
186+
}
187+
return filterTables(out)
188+
}
189+
190+
// Sorts the boundaries and creates mock table out of them.
191+
func genTables(boundaries [][]byte) []*table.Table {
192+
buildTable := func(k1, k2 []byte) *table.Table {
193+
opts := table.Options{
194+
ChkMode: options.NoVerification,
195+
}
196+
b := table.NewTableBuilder(opts)
197+
defer b.Close()
198+
// Add one key so that we can open this table.
199+
b.Add(y.KeyWithTs(k1, 1), y.ValueStruct{}, 0)
200+
b.Add(y.KeyWithTs(k2, 1), y.ValueStruct{}, 0)
201+
tab, err := table.OpenInMemoryTable(b.Finish(), 0, &opts)
202+
y.Check(err)
203+
return tab
204+
}
205+
206+
sort.Slice(boundaries, func(i, j int) bool {
207+
return bytes.Compare(boundaries[i], boundaries[j]) < 0
208+
})
209+
out := make([]*table.Table, 0, len(boundaries))
210+
for i := range boundaries {
211+
var j int
212+
if i != 0 {
213+
j = i - 1
214+
}
215+
out = append(out, buildTable(boundaries[i], boundaries[j]))
216+
}
217+
fmt.Printf("Created %d mock tables.\n", len(out))
218+
return out
219+
}
220+
221+
func getBoundaries(db *badger.DB) [][]byte {
222+
fmt.Println("Getting the table boundaries...")
223+
tables := db.Tables()
224+
out := make([][]byte, 0, 2*len(tables))
225+
for _, t := range tables {
226+
out = append(out, t.Left, t.Right)
227+
}
228+
return out
229+
}

badger/cmd/read_bench.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ func lookupForKey(db *badger.DB, key []byte) (sz uint64) {
170170
err := db.View(func(txn *badger.Txn) error {
171171
iopt := badger.DefaultIteratorOptions
172172
iopt.AllVersions = true
173+
iopt.PrefetchValues = false
173174
it := txn.NewKeyIterator(key, iopt)
174175
defer it.Close()
175176

@@ -189,7 +190,7 @@ func lookupForKey(db *badger.DB, key []byte) (sz uint64) {
189190
}
190191

191192
// getSampleKeys uses stream framework internally, to get keys in random order.
192-
func getSampleKeys(db *badger.DB) ([][]byte, error) {
193+
func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error) {
193194
var keys [][]byte
194195
count := 0
195196
stream := db.NewStreamAt(math.MaxUint64)
@@ -218,7 +219,7 @@ func getSampleKeys(db *badger.DB) ([][]byte, error) {
218219
}
219220
keys = append(keys, kv.Key)
220221
count++
221-
if count >= ro.sampleSize {
222+
if count >= sampleSize {
222223
cancel()
223224
return errStop
224225
}

badger/cmd/write_bench.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func writeRandom(db *badger.DB, num uint64) error {
169169

170170
func readTest(db *badger.DB, dur time.Duration) {
171171
now := time.Now()
172-
keys, err := getSampleKeys(db)
172+
keys, err := getSampleKeys(db, ro.sampleSize)
173173
if err != nil {
174174
panic(err)
175175
}

manifest_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func buildTable(t *testing.T, keyValues [][]string, bopts table.Options) *table.
127127
defer b.Close()
128128
// TODO: Add test for file garbage collection here. No files should be left after the tests here.
129129

130-
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
130+
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32())
131131

132132
sort.Slice(keyValues, func(i, j int) bool {
133133
return keyValues[i][0] < keyValues[j][0]

table/table_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ func TestTableBigValues(t *testing.T) {
653653
builder.Add(key, vs, 0)
654654
}
655655

656-
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
656+
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32())
657657
tbl, err := CreateTable(filename, builder)
658658
require.NoError(t, err, "unable to open table")
659659
defer func() { require.NoError(t, tbl.DecrRef()) }()
@@ -755,7 +755,7 @@ func BenchmarkReadMerged(b *testing.B) {
755755
require.NoError(b, err)
756756

757757
for i := 0; i < m; i++ {
758-
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
758+
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32())
759759
opts := Options{Compression: options.ZSTD, BlockSize: 4 * 1024, BloomFalsePositive: 0.01}
760760
opts.BlockCache = cache
761761
builder := NewTableBuilder(opts)
@@ -849,7 +849,7 @@ func getTableForBenchmarks(b *testing.B, count int, cache *ristretto.Cache) *Tab
849849
opts.BlockCache = cache
850850
builder := NewTableBuilder(opts)
851851
defer builder.Close()
852-
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
852+
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Uint32())
853853
for i := 0; i < count; i++ {
854854
k := fmt.Sprintf("%016x", i)
855855
v := fmt.Sprintf("%d", i)

value_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -969,8 +969,9 @@ func BenchmarkReadWrite(b *testing.B) {
969969
dir, err := os.MkdirTemp("", "vlog-benchmark")
970970
y.Check(err)
971971
defer removeDir(dir)
972-
973-
db, err := Open(getTestOptions(dir))
972+
opts := getTestOptions(dir)
973+
opts.ValueThreshold = 0
974+
db, err := Open(opts)
974975
y.Check(err)
975976

976977
vl := &db.vlog

0 commit comments

Comments
 (0)