From 5c8e529c909050d75574cec25fe4cc6b7e4a72f8 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Thu, 8 May 2025 17:41:24 +0800 Subject: [PATCH 1/4] feat: add ChecksumAlgorithm option to decides which algorithm calculate checksums. --- badger/cmd/info.go | 19 ++++++++++++++++++- options.go | 16 ++++++++++++++++ table/builder.go | 13 ++++++------- table/table.go | 3 +++ 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/badger/cmd/info.go b/badger/cmd/info.go index c8878ff73..6dfbacd78 100644 --- a/badger/cmd/info.go +++ b/badger/cmd/info.go @@ -20,6 +20,7 @@ import ( "bytes" "encoding/hex" "fmt" + "github.com/dgraph-io/badger/v4/pb" "io/fs" "os" "path/filepath" @@ -52,6 +53,7 @@ type flagOptions struct { checksumVerificationMode string discard bool externalMagicVersion uint16 + checksumAlgorithm string } var ( @@ -81,6 +83,7 @@ func init() { infoCmd.Flags().StringVar(&opt.encryptionKey, "enc-key", "", "Use the provided encryption key") infoCmd.Flags().StringVar(&opt.checksumVerificationMode, "cv-mode", "none", "[none, table, block, tableAndBlock] Specifies when the db should verify checksum for SST.") + infoCmd.Flags().StringVar(&opt.checksumAlgorithm, "ct", "crc32c", "[crc32c,xxhash64] Specifies the checksum algorithm for SST.") infoCmd.Flags().BoolVar(&opt.discard, "discard", false, "Parse and print DISCARD file from value logs.") infoCmd.Flags().Uint16Var(&opt.externalMagicVersion, "external-magic", 0, @@ -101,6 +104,7 @@ to the Dgraph team. func handleInfo(cmd *cobra.Command, args []string) error { cvMode := checksumVerificationMode(opt.checksumVerificationMode) + ct := checksumAlgorithm(opt.checksumAlgorithm) bopt := badger.DefaultOptions(sstDir). WithValueDir(vlogDir). WithReadOnly(opt.readOnly). @@ -108,7 +112,8 @@ func handleInfo(cmd *cobra.Command, args []string) error { WithIndexCacheSize(200 << 20). WithEncryptionKey([]byte(opt.encryptionKey)). WithChecksumVerificationMode(cvMode). - WithExternalMagic(opt.externalMagicVersion) + WithExternalMagic(opt.externalMagicVersion). + WithChecksumAlgorithm(ct) if opt.discard { ds, err := badger.InitDiscardStats(bopt) @@ -522,6 +527,18 @@ func pluralFiles(count int) string { return "files" } +func checksumAlgorithm(ct string) pb.Checksum_Algorithm { + switch ct { + case "crc32c": + return pb.Checksum_CRC32C + case "xxhash64": + return pb.Checksum_XXHash64 + default: + fmt.Printf("Invalid checksum algorithm : %s\n", ct) + os.Exit(1) + } + return pb.Checksum_CRC32C +} func checksumVerificationMode(cvMode string) options.ChecksumVerificationMode { switch cvMode { case "none": diff --git a/options.go b/options.go index 06ec2b6be..f3998678d 100644 --- a/options.go +++ b/options.go @@ -18,6 +18,7 @@ package badger import ( "fmt" + "github.com/dgraph-io/badger/v4/pb" "os" "reflect" "strconv" @@ -106,6 +107,9 @@ type Options struct { // ChecksumVerificationMode decides when db should verify checksums for SSTable blocks. ChecksumVerificationMode options.ChecksumVerificationMode + //ChecksumAlgorithm decides which algorithm calculate checksums + ChecksumAlgorithm pb.Checksum_Algorithm + // DetectConflicts determines whether the transactions would be checked for // conflicts. The transactions can be processed at a higher rate when // conflict detection is disabled. @@ -187,6 +191,7 @@ func DefaultOptions(path string) Options { EncryptionKeyRotationDuration: 10 * 24 * time.Hour, // Default 10 days. DetectConflicts: true, NamespaceOffset: -1, + ChecksumAlgorithm: pb.Checksum_CRC32C, } } @@ -201,6 +206,7 @@ func buildTableOptions(db *DB) table.Options { BlockSize: opt.BlockSize, BloomFalsePositive: opt.BloomFalsePositive, ChkMode: opt.ChecksumVerificationMode, + ChkAlgo: opt.ChecksumAlgorithm, Compression: opt.Compression, ZSTDCompressionLevel: opt.ZSTDCompressionLevel, BlockCache: db.blockCache, @@ -682,6 +688,16 @@ func (opt Options) WithChecksumVerificationMode(cvMode options.ChecksumVerificat return opt } +// WithChecksumAlgorithm return a new Options value with ChecksumAlgorithm set to the given value +// +// ChecksumAlgorithm decides which algorithm calculate checksums. +// +// The default value of ChecksumAlgorithm is pb.Checksum_CRC32C. +func (opt Options) WithChecksumAlgorithm(ct pb.Checksum_Algorithm) Options { + opt.ChecksumAlgorithm = ct + return opt +} + // WithBlockCacheSize returns a new Options value with BlockCacheSize set to the given value. // // This value specifies how much data cache should hold in memory. A small size diff --git a/table/builder.go b/table/builder.go index 7d439f338..484db2d7b 100644 --- a/table/builder.go +++ b/table/builder.go @@ -278,7 +278,7 @@ func (b *Builder) finishBlock() { b.append(y.U32SliceToBytes(b.curBlock.entryOffsets)) b.append(y.U32ToBytes(uint32(len(b.curBlock.entryOffsets)))) - checksum := b.calculateChecksum(b.curBlock.data[:b.curBlock.end]) + checksum := b.calculateChecksum(b.curBlock.data[:b.curBlock.end], b.opts.ChkAlgo) // Append the block checksum and its length. b.append(checksum) @@ -454,7 +454,7 @@ func (b *Builder) Done() buildData { index, err = b.encrypt(index) y.Check(err) } - checksum := b.calculateChecksum(index) + checksum := b.calculateChecksum(index, b.opts.ChkAlgo) bd.index = index bd.checksum = checksum @@ -462,19 +462,18 @@ func (b *Builder) Done() buildData { return bd } -func (b *Builder) calculateChecksum(data []byte) []byte { +func (b *Builder) calculateChecksum(data []byte, ct pb.Checksum_Algorithm) []byte { // Build checksum for the index. checksum := pb.Checksum{ - // TODO: The checksum type should be configurable from the - // options. + // We chose to use CRC32 as the default option because // it performed better compared to xxHash64. // See the BenchmarkChecksum in table_test.go file // Size => 1024 B 2048 B // CRC32 => 63.7 ns/op 112 ns/op // xxHash64 => 87.5 ns/op 158 ns/op - Sum: y.CalculateChecksum(data, pb.Checksum_CRC32C), - Algo: pb.Checksum_CRC32C, + Sum: y.CalculateChecksum(data, ct), + Algo: ct, } // Write checksum to the file. diff --git a/table/table.go b/table/table.go index 1f90a73e5..e4caea5da 100644 --- a/table/table.go +++ b/table/table.go @@ -62,6 +62,9 @@ type Options struct { // ChkMode is the checksum verification mode for Table. ChkMode options.ChecksumVerificationMode + //ChkAlgo is the checksum algorithm mode for Table. + ChkAlgo pb.Checksum_Algorithm + // Options for Table builder. // BloomFalsePositive is the false positive probabiltiy of bloom filter. From b9dccf5c31e16790582db29b13caee6fd15ef310 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 12 May 2025 11:04:11 +0800 Subject: [PATCH 2/4] order import lib; add space at the begin --- options.go | 4 ++-- table/table.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/options.go b/options.go index f3998678d..100c87cc5 100644 --- a/options.go +++ b/options.go @@ -18,7 +18,6 @@ package badger import ( "fmt" - "github.com/dgraph-io/badger/v4/pb" "os" "reflect" "strconv" @@ -28,6 +27,7 @@ import ( "github.com/pkg/errors" "github.com/dgraph-io/badger/v4/options" + "github.com/dgraph-io/badger/v4/pb" "github.com/dgraph-io/badger/v4/table" "github.com/dgraph-io/badger/v4/y" "github.com/dgraph-io/ristretto/v2/z" @@ -107,7 +107,7 @@ type Options struct { // ChecksumVerificationMode decides when db should verify checksums for SSTable blocks. ChecksumVerificationMode options.ChecksumVerificationMode - //ChecksumAlgorithm decides which algorithm calculate checksums + // ChecksumAlgorithm decides which algorithm calculate checksums ChecksumAlgorithm pb.Checksum_Algorithm // DetectConflicts determines whether the transactions would be checked for diff --git a/table/table.go b/table/table.go index e4caea5da..6b02a9dc7 100644 --- a/table/table.go +++ b/table/table.go @@ -62,7 +62,7 @@ type Options struct { // ChkMode is the checksum verification mode for Table. ChkMode options.ChecksumVerificationMode - //ChkAlgo is the checksum algorithm mode for Table. + // ChkAlgo is the checksum algorithm mode for Table. ChkAlgo pb.Checksum_Algorithm // Options for Table builder. From a4b41cafcfa04fde2b467f67e5b61360ce920101 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 12 May 2025 11:05:29 +0800 Subject: [PATCH 3/4] handle checking checksum algorithm gracefully --- badger/cmd/info.go | 28 +++++++++++++++++++--------- badger/cmd/info_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 badger/cmd/info_test.go diff --git a/badger/cmd/info.go b/badger/cmd/info.go index 6dfbacd78..2128c2d1d 100644 --- a/badger/cmd/info.go +++ b/badger/cmd/info.go @@ -19,8 +19,8 @@ package cmd import ( "bytes" "encoding/hex" + stderrors "errors" "fmt" - "github.com/dgraph-io/badger/v4/pb" "io/fs" "os" "path/filepath" @@ -34,6 +34,7 @@ import ( "github.com/dgraph-io/badger/v4" "github.com/dgraph-io/badger/v4/options" + "github.com/dgraph-io/badger/v4/pb" "github.com/dgraph-io/badger/v4/table" "github.com/dgraph-io/badger/v4/y" ) @@ -58,6 +59,9 @@ type flagOptions struct { var ( opt flagOptions + + // ErrInvalidChecksumAlgorithm is returned if the checksum algorithm is invalid. + ErrInvalidChecksumAlgorithm = stderrors.New("Invalid checksum algorithm. Supported values: crc32c, xxhash64.") ) func init() { @@ -83,7 +87,8 @@ func init() { infoCmd.Flags().StringVar(&opt.encryptionKey, "enc-key", "", "Use the provided encryption key") infoCmd.Flags().StringVar(&opt.checksumVerificationMode, "cv-mode", "none", "[none, table, block, tableAndBlock] Specifies when the db should verify checksum for SST.") - infoCmd.Flags().StringVar(&opt.checksumAlgorithm, "ct", "crc32c", "[crc32c,xxhash64] Specifies the checksum algorithm for SST.") + infoCmd.Flags().StringVar(&opt.checksumAlgorithm, "ct", "crc32c", "[crc32c,xxhash64] "+ + "Specifies the checksum algorithm for SST.") infoCmd.Flags().BoolVar(&opt.discard, "discard", false, "Parse and print DISCARD file from value logs.") infoCmd.Flags().Uint16Var(&opt.externalMagicVersion, "external-magic", 0, @@ -104,7 +109,11 @@ to the Dgraph team. func handleInfo(cmd *cobra.Command, args []string) error { cvMode := checksumVerificationMode(opt.checksumVerificationMode) - ct := checksumAlgorithm(opt.checksumAlgorithm) + ct, err := strToChecksumAlgorithm(opt.checksumAlgorithm) + if err != nil { + y.Check(err) + } + bopt := badger.DefaultOptions(sstDir). WithValueDir(vlogDir). WithReadOnly(opt.readOnly). @@ -527,18 +536,19 @@ func pluralFiles(count int) string { return "files" } -func checksumAlgorithm(ct string) pb.Checksum_Algorithm { +// When the checkSum Algorithm is invalid, func strToChecksumAlgorithm will return the default checkSum Algorithm +func strToChecksumAlgorithm(ct string) (pb.Checksum_Algorithm, error) { switch ct { case "crc32c": - return pb.Checksum_CRC32C + return pb.Checksum_CRC32C, nil case "xxhash64": - return pb.Checksum_XXHash64 + return pb.Checksum_XXHash64, nil default: - fmt.Printf("Invalid checksum algorithm : %s\n", ct) - os.Exit(1) + return pb.Checksum_CRC32C, y.Wrap(ErrInvalidChecksumAlgorithm, + "InvalidChecksumAlgorithm") } - return pb.Checksum_CRC32C } + func checksumVerificationMode(cvMode string) options.ChecksumVerificationMode { switch cvMode { case "none": diff --git a/badger/cmd/info_test.go b/badger/cmd/info_test.go new file mode 100644 index 000000000..5f8eea33b --- /dev/null +++ b/badger/cmd/info_test.go @@ -0,0 +1,39 @@ +/* + * Copyright 2019 Dgraph Labs, Inc. and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "fmt" + "github.com/dgraph-io/badger/v4/pb" + "github.com/stretchr/testify/require" + "testing" +) + +func TestStrToChecksumAlgorithm(t *testing.T) { + ctCRC32, err := strToChecksumAlgorithm("crc32c") + require.True(t, ctCRC32 == pb.Checksum_CRC32C) + require.True(t, err == nil) + + ctHash, err := strToChecksumAlgorithm("xxhash64") + require.True(t, ctHash == pb.Checksum_XXHash64) + require.True(t, err == nil) + + ctOthers, err := strToChecksumAlgorithm("others") + fmt.Println(err) + require.True(t, ctOthers == pb.Checksum_CRC32C) + require.True(t, err != nil) +} From 0d8da258fc239cb8ada05f654c122adbb056f541 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 12 May 2025 17:11:41 +0800 Subject: [PATCH 4/4] fixed code style --- badger/cmd/info.go | 12 +++++------- badger/cmd/info_test.go | 3 ++- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/badger/cmd/info.go b/badger/cmd/info.go index 2c091b50e..659dc72ca 100644 --- a/badger/cmd/info.go +++ b/badger/cmd/info.go @@ -8,7 +8,7 @@ package cmd import ( "bytes" "encoding/hex" - stderrors "errors" + "errors" "fmt" "io/fs" "os" @@ -48,8 +48,8 @@ type flagOptions struct { var ( opt flagOptions - // ErrInvalidChecksumAlgorithm is returned if the checksum algorithm is invalid. - ErrInvalidChecksumAlgorithm = stderrors.New("Invalid checksum algorithm. Supported values: crc32c, xxhash64.") + // errInvalidChecksumAlgorithm is returned if the checksum algorithm is invalid. + errInvalidChecksumAlgorithm = errors.New("Invalid checksum algorithm. Supported values: crc32c, xxhash64.") ) func init() { @@ -98,9 +98,7 @@ to the Dgraph team. func handleInfo(cmd *cobra.Command, args []string) error { cvMode := checksumVerificationMode(opt.checksumVerificationMode) ct, err := strToChecksumAlgorithm(opt.checksumAlgorithm) - if err != nil { - y.Check(err) - } + y.Check(err) bopt := badger.DefaultOptions(sstDir). WithValueDir(vlogDir). @@ -537,7 +535,7 @@ func strToChecksumAlgorithm(ct string) (pb.Checksum_Algorithm, error) { case "xxhash64": return pb.Checksum_XXHash64, nil default: - return pb.Checksum_CRC32C, y.Wrap(ErrInvalidChecksumAlgorithm, + return pb.Checksum_CRC32C, y.Wrap(errInvalidChecksumAlgorithm, "InvalidChecksumAlgorithm") } } diff --git a/badger/cmd/info_test.go b/badger/cmd/info_test.go index 5f8eea33b..3347d4780 100644 --- a/badger/cmd/info_test.go +++ b/badger/cmd/info_test.go @@ -18,9 +18,10 @@ package cmd import ( "fmt" + "testing" + "github.com/dgraph-io/badger/v4/pb" "github.com/stretchr/testify/require" - "testing" ) func TestStrToChecksumAlgorithm(t *testing.T) {