diff --git a/go/cmd/dolt/commands/engine/sqlengine.go b/go/cmd/dolt/commands/engine/sqlengine.go
index d7f9ee10fcf..85c3772eec7 100644
--- a/go/cmd/dolt/commands/engine/sqlengine.go
+++ b/go/cmd/dolt/commands/engine/sqlengine.go
@@ -16,10 +16,10 @@ package engine
import (
"context"
- "fmt"
"os"
"strconv"
"strings"
+ "time"
gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/eventscheduler"
@@ -43,7 +43,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/config"
@@ -189,7 +188,13 @@ func NewSqlEngine(
"authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig),
})
- statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider()))
+ var statsPro sql.StatsProvider
+ _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled)
+ if enabled.(int8) == 1 {
+ statsPro = statspro.NewStatsCoord(ctx, pro, sqlEngine.NewDefaultContext, logrus.StandardLogger(), bThreads, mrEnv.GetEnv(mrEnv.GetFirstDatabase()))
+ } else {
+ statsPro = statspro.StatsNoop{}
+ }
engine.Analyzer.Catalog.StatsProvider = statsPro
engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
@@ -206,8 +211,28 @@ func NewSqlEngine(
// configuring stats depends on sessionBuilder
// sessionBuilder needs ref to statsProv
- if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil {
- fmt.Fprintln(cli.CliErr, err)
+ if sc, ok := statsPro.(*statspro.StatsCoord); ok {
+ //sc.Debug = true
+ _, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly)
+ sc.SetMemOnly(memOnly.(int8) == 1)
+
+ typ, jobI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsJobInterval)
+ _, gcI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCInterval)
+ _, brI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranchInterval)
+
+ jobInterval, _, _ := typ.GetType().Convert(jobI)
+ gcInterval, _, _ := typ.GetType().Convert(gcI)
+ brInterval, _, _ := typ.GetType().Convert(brI)
+
+ sc.SetTimers(
+ jobInterval.(int64)*int64(time.Millisecond),
+ gcInterval.(int64)*int64(time.Millisecond),
+ brInterval.(int64)*int64(time.Millisecond))
+
+ err := sc.Init(ctx, dbs, false)
+ if err != nil {
+ return nil, err
+ }
}
// Load MySQL Db information
diff --git a/go/cmd/dolt/commands/sqlserver/server.go b/go/cmd/dolt/commands/sqlserver/server.go
index 33d253a377a..3ae8cb70e45 100644
--- a/go/cmd/dolt/commands/sqlserver/server.go
+++ b/go/cmd/dolt/commands/sqlserver/server.go
@@ -55,6 +55,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/cluster"
_ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dfunctions"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqlserver"
"github.com/dolthub/dolt/go/libraries/events"
"github.com/dolthub/dolt/go/libraries/utils/config"
@@ -260,23 +261,23 @@ func ConfigureServices(
var sqlEngine *engine.SqlEngine
InitSqlEngine := &svcs.AnonService{
InitF: func(ctx context.Context) (err error) {
- if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil {
- // Auto-stats is off by default for every command except
- // sql-server. Unless the config specifies a specific
- // behavior, enable server stats collection.
- sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1)
- } else if statsOn != "0" {
- // do not bootstrap if auto-stats enabled
- } else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil {
- // If we've disabled stats collection and config does not
- // specify bootstrap behavior, enable bootstrapping.
- sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1)
- }
sqlEngine, err = engine.NewSqlEngine(
ctx,
mrEnv,
config,
)
+ if sc, ok := sqlEngine.GetUnderlyingEngine().Analyzer.Catalog.StatsProvider.(*statspro.StatsCoord); ok {
+ sqlCtx, err := sqlEngine.NewDefaultContext(ctx)
+ if err != nil {
+ return err
+ }
+ if sc == nil {
+ return fmt.Errorf("unexpected nil stats coord")
+ }
+ if err = sc.Restart(sqlCtx); err != nil {
+ return err
+ }
+ }
return err
},
StopF: func() error {
diff --git a/go/go.mod b/go/go.mod
index c9f89bfbc1d..35147a1df1d 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -56,7 +56,7 @@ require (
github.com/cespare/xxhash/v2 v2.2.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
- github.com/dolthub/go-mysql-server v0.19.1-0.20250207201905-b3a4c87c4fdc
+ github.com/dolthub/go-mysql-server v0.19.1-0.20250210190204-a73f126157ef
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
github.com/dolthub/swiss v0.1.0
github.com/esote/minmaxheap v1.0.0
@@ -91,7 +91,6 @@ require (
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
golang.org/x/text v0.21.0
gonum.org/v1/plot v0.11.0
- gopkg.in/errgo.v2 v2.1.0
gopkg.in/go-jose/go-jose.v2 v2.6.3
gopkg.in/yaml.v3 v3.0.1
)
diff --git a/go/go.sum b/go/go.sum
index 5038e3a6cf6..8dbf9a92389 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -179,8 +179,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90 h1:Sni8jrP0sy/w9ZYXoff4g/ixe+7bFCZlfCqXKJSU+zM=
github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA=
-github.com/dolthub/go-mysql-server v0.19.1-0.20250207201905-b3a4c87c4fdc h1:SdN7GRPtaqmLwfi6cVcyF4Oc8FbFUJ+mwsFRV++6iH4=
-github.com/dolthub/go-mysql-server v0.19.1-0.20250207201905-b3a4c87c4fdc/go.mod h1:QQxZvPHOtycbC2bVmqmT6/Fov2g1/T1Rtm76wLd/Y1E=
+github.com/dolthub/go-mysql-server v0.19.1-0.20250210190204-a73f126157ef h1:vQ5zStRSgdem9R3BtUhkVa5Q8DhSrYs9ReRVFIq86so=
+github.com/dolthub/go-mysql-server v0.19.1-0.20250210190204-a73f126157ef/go.mod h1:QQxZvPHOtycbC2bVmqmT6/Fov2g1/T1Rtm76wLd/Y1E=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE=
@@ -1153,7 +1153,6 @@ gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
-gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o=
diff --git a/go/go.work.sum b/go/go.work.sum
index 71f195420ad..37de10bbf10 100644
--- a/go/go.work.sum
+++ b/go/go.work.sum
@@ -404,8 +404,6 @@ github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byA
github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss=
github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA=
github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE=
-github.com/esote/minmaxheap v1.0.0 h1:rgA7StnXXpZG6qlM0S7pUmEv1KpWe32rYT4x8J8ntaA=
-github.com/esote/minmaxheap v1.0.0/go.mod h1:Ln8+i7fS1k3PLgZI2JAo0iA1as95QnIYiGCrqSJ5FZk=
github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8=
github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk=
github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db h1:gb2Z18BhTPJPpLQWj4T+rfKHYCHxRHCtRxhKKjRidVw=
@@ -732,6 +730,7 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/cheggaaa/pb.v1 v1.0.25 h1:Ev7yu1/f6+d+b3pi5vPdRPc6nNtP1umSfcWiEfRqv6I=
+gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go
index e27a397915c..699f3ec0734 100644
--- a/go/libraries/doltcore/doltdb/doltdb.go
+++ b/go/libraries/doltcore/doltdb/doltdb.go
@@ -2052,7 +2052,7 @@ func (ddb *DoltDB) AddStash(ctx context.Context, head *Commit, stash RootValue,
return err
}
-func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Hash) error {
+func (ddb *DoltDB) SetStatistics(ctx context.Context, branch string, addr hash.Hash) error {
statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String())
if err != nil {
return err
diff --git a/go/libraries/doltcore/remotestorage/internal/reliable/chan.go b/go/libraries/doltcore/remotestorage/internal/reliable/chan.go
index 8beeb5ea61a..c975e7e52f9 100644
--- a/go/libraries/doltcore/remotestorage/internal/reliable/chan.go
+++ b/go/libraries/doltcore/remotestorage/internal/reliable/chan.go
@@ -15,7 +15,7 @@
package reliable
import (
- "github.com/dolthub/dolt/go/libraries/doltcore/remotestorage/internal/circular"
+ "github.com/dolthub/dolt/go/libraries/utils/circular"
)
// A reliable.Chan is a type of channel transformer which can be used to build
diff --git a/go/libraries/doltcore/schema/statistic.go b/go/libraries/doltcore/schema/statistic.go
index 1879951e10b..88215a7443a 100644
--- a/go/libraries/doltcore/schema/statistic.go
+++ b/go/libraries/doltcore/schema/statistic.go
@@ -24,12 +24,12 @@ import (
const StatsVersion int64 = 1
const (
- StatsQualifierColName = "qualifier"
StatsDbColName = "database_name"
StatsTableColName = "table_name"
StatsIndexColName = "index_name"
- StatsPositionColName = "position"
+ StatsBranchName = "branch"
StatsCommitHashColName = "commit_hash"
+ StatsPrefixLenName = "prefix_len"
StatsRowCountColName = "row_count"
StatsDistinctCountColName = "distinct_count"
StatsNullCountColName = "null_count"
@@ -42,7 +42,7 @@ const (
StatsMcv2ColName = "mcv2"
StatsMcv3ColName = "mcv3"
StatsMcv4ColName = "mcv4"
- StatsMcvCountsColName = "mcvCounts"
+ StatsMcvCountsColName = "mcv_counts"
StatsVersionColName = "version"
)
@@ -52,6 +52,7 @@ const (
StatsIndexTag
StatsPositionTag
StatsVersionTag
+ StatsPrefixLenTag
StatsCommitHashTag
StatsRowCountTag
StatsDistinctCountTag
@@ -71,9 +72,9 @@ const (
func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
return sql.PrimaryKeySchema{
Schema: sql.Schema{
- &sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
- &sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
- &sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName},
+ &sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName},
+ &sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName},
+ &sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName},
&sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName},
@@ -88,7 +89,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema {
&sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName},
&sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName},
},
- PkOrdinals: []int{0, 1},
}
}
@@ -96,20 +96,14 @@ var StatsTableDoltSchema = StatsTableDoltSchemaGen()
func StatsTableDoltSchemaGen() Schema {
colColl := NewColCollection(
- NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}),
- NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}),
- NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}),
- NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}),
+ NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}),
+ NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}),
NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}),
- NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}),
NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}),
- NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}),
- NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}),
NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}),
- NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}),
NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false),
NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false),
NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false),
diff --git a/go/libraries/doltcore/sqle/clusterdb/database.go b/go/libraries/doltcore/sqle/clusterdb/database.go
index dd741a9a205..4577d2f3c4d 100644
--- a/go/libraries/doltcore/sqle/clusterdb/database.go
+++ b/go/libraries/doltcore/sqle/clusterdb/database.go
@@ -162,6 +162,10 @@ func (db database) RequestedName() string {
return db.Name()
}
+func (db database) AliasedName() string {
+ return db.Name()
+}
+
type noopRepoStateWriter struct{}
var _ env.RepoStateWriter = noopRepoStateWriter{}
diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go
index f75e5f52997..10c5e154999 100644
--- a/go/libraries/doltcore/sqle/database.go
+++ b/go/libraries/doltcore/sqle/database.go
@@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds
if err != nil {
return nil, false, err
}
+ if branch == "" {
+ branch = db.Revision()
+ }
dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true
case doltdb.ProceduresTableName:
found = true
diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go
index 37c4affbb05..9e22ef72aeb 100644
--- a/go/libraries/doltcore/sqle/database_provider.go
+++ b/go/libraries/doltcore/sqle/database_provider.go
@@ -970,7 +970,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua
}
}
- db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
+ db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName)
// preserve original user case in the case of not found
if sql.ErrDatabaseNotFound.Is(err) {
return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName)
@@ -1511,8 +1511,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b
return "", false, nil
}
-// revisionDbForBranch returns a new database that is tied to the branch named by revSpec
-func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
+// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec
+func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) {
static := staticRepoState{
branch: ref.NewBranchRef(revSpec),
RepoStateWriter: srcDb.DbData().Rsw,
diff --git a/go/libraries/doltcore/sqle/dprocedures/init.go b/go/libraries/doltcore/sqle/dprocedures/init.go
index 499d4209886..5a00fcb39c2 100644
--- a/go/libraries/doltcore/sqle/dprocedures/init.go
+++ b/go/libraries/doltcore/sqle/dprocedures/init.go
@@ -47,12 +47,13 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{
{Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag},
{Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints},
- {Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)},
{Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)},
{Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)},
- {Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)},
- {Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)},
+ {Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)},
{Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)},
+ {Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)},
+ {Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)},
+ {Name: "dolt_stats_timers", Schema: statsFuncSchema, Function: statsFunc(statsTimers)},
}
// stringSchema returns a non-nullable schema with all columns as LONGTEXT.
diff --git a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
index 139bec5e5d2..f8cc95850d2 100644
--- a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
+++ b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go
@@ -15,14 +15,14 @@
package dprocedures
import (
+ "context"
+ "encoding/json"
"fmt"
- "strings"
+ "strconv"
"github.com/dolthub/go-mysql-server/sql"
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
@@ -34,9 +34,16 @@ var statsFuncSchema = []*sql.Column{
},
}
-func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
- return func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
- res, err := fn(ctx)
+const OkResult = "Ok"
+
+func statsFunc(fn func(ctx *sql.Context, args ...string) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) {
+ return func(ctx *sql.Context, args ...string) (iter sql.RowIter, err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ err = fmt.Errorf("stats function unexpectedly panicked: %s", r)
+ }
+ }()
+ res, err := fn(ctx, args...)
if err != nil {
return nil, err
}
@@ -44,124 +51,168 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con
}
}
-// AutoRefreshStatsProvider is a sql.StatsProvider that exposes hooks for
+type StatsInfo struct {
+ DbCnt int `json:"dbCnt"`
+ ReadCnt int `json:"readCnt"`
+ Active bool `json:"active"`
+ DbSeedCnt int `json:"dbSeedCnt"`
+ StorageBucketCnt int `json:"storageBucketCnt"`
+ CachedBucketCnt int `json:"cachedBucketCnt"`
+ CachedBoundCnt int `json:"cachedBoundCnt"`
+ CachedTemplateCnt int `json:"cachedTemplateCnt"`
+ StatCnt int `json:"statCnt"`
+ GcCounter int `json:"gcCounter"`
+ SyncCounter int `json:"syncCounter"`
+}
+
+func (si StatsInfo) ToJson() string {
+ jsonData, err := json.Marshal(si)
+ if err != nil {
+ return ""
+ }
+ return string(jsonData)
+}
+
+// ToggableStats is a sql.StatsProvider that exposes hooks for
// observing and manipulating background database auto refresh threads.
-type AutoRefreshStatsProvider interface {
+type ToggableStats interface {
sql.StatsProvider
- CancelRefreshThread(string)
- StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error
- ThreadStatus(string) string
- Prune(ctx *sql.Context) error
+ //FlushQueue(ctx context.Context) error
+ Restart(context.Context) error
+ Stop(context.Context) error
+ Info(ctx context.Context) (StatsInfo, error)
Purge(ctx *sql.Context) error
+ WaitForDbSync(ctx *sql.Context) error
+ Gc(ctx *sql.Context) error
+ //ValidateState(ctx context.Context) error
+ //Init(context.Context, []dsess.SqlDatabase, bool) error
+ SetTimers(int64, int64, int64)
}
type BranchStatsProvider interface {
DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error
}
-// statsRestart tries to stop and then start a refresh thread
-func statsRestart(ctx *sql.Context) (interface{}, error) {
+// statsRestart flushes the current job queue and re-inits all
+// statistic databases.
+func statsRestart(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider()
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
- if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
- pro := dSess.Provider()
- newFs, err := pro.FileSystemForDatabase(dbName)
- if err != nil {
- return nil, fmt.Errorf("failed to restart stats collection: %w", err)
+ if afp, ok := statsPro.(ToggableStats); ok {
+ if err := afp.Restart(ctx); err != nil {
+ return nil, err
}
- dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO")
+ return OkResult, nil
+ }
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
+}
- sqlDb, ok := pro.BaseDatabase(ctx, dbName)
- if !ok {
- return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName)
+// statsInfo returns the last update for a stats thread
+func statsInfo(ctx *sql.Context, _ ...string) (interface{}, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ pro := dSess.StatsProvider()
+ if afp, ok := pro.(ToggableStats); ok {
+ info, err := afp.Info(ctx)
+ if err != nil {
+ return nil, err
}
+ return info.ToJson(), nil
+ }
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
+}
- afp.CancelRefreshThread(dbName)
-
- err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb)
- if err != nil {
- return nil, fmt.Errorf("failed to restart collection: %w", err)
+// statsWait blocks until the job queue executes two full loops
+// of instructions, which will (1) pick up and (2) commit new
+// sets of index-bucket dependencies.
+func statsWait(ctx *sql.Context, _ ...string) (interface{}, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ pro := dSess.StatsProvider()
+ if afp, ok := pro.(ToggableStats); ok {
+ if err := afp.WaitForDbSync(ctx); err != nil {
+ return nil, err
}
- return fmt.Sprintf("restarted stats collection: %s", ref.StatsRef{}.String()), nil
+ return OkResult, nil
}
- return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsStatus returns the last update for a stats thread
-func statsStatus(ctx *sql.Context) (interface{}, error) {
+// statsGc rewrites the cache to only include objects reachable
+// by the current root value.
+func statsGc(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
pro := dSess.StatsProvider()
- if afp, ok := pro.(AutoRefreshStatsProvider); ok {
- return afp.ThreadStatus(dbName), nil
+ if afp, ok := pro.(ToggableStats); ok {
+ if err := afp.Gc(ctx); err != nil {
+ return nil, err
+ }
+ return OkResult, nil
}
- return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsStop cancels a refresh thread
-func statsStop(ctx *sql.Context) (interface{}, error) {
+// statsStop flushes the job queue and leaves the stats provider
+// in a paused state.
+func statsStop(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
statsPro := dSess.StatsProvider()
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
- if afp, ok := statsPro.(AutoRefreshStatsProvider); ok {
- afp.CancelRefreshThread(dbName)
- return fmt.Sprintf("stopped thread: %s", dbName), nil
+ if afp, ok := statsPro.(ToggableStats); ok {
+ if err := afp.Stop(ctx); err != nil {
+ return nil, err
+ }
+ return OkResult, nil
}
- return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider")
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
-// statsDrop deletes the stats ref
-func statsDrop(ctx *sql.Context) (interface{}, error) {
+// statsPurge flushes the job queue, deletes the current caches
+// and storage targets, re-initializes the tracked database
+// states, and returns with stats collection paused.
+func statsPurge(ctx *sql.Context, _ ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- pro := dSess.StatsProvider()
- dbName := strings.ToLower(ctx.GetCurrentDatabase())
+ pro, ok := dSess.StatsProvider().(ToggableStats)
+ if !ok {
+ return nil, fmt.Errorf("stats not persisted, cannot purge")
+ }
- branch, err := dSess.GetBranch()
+ err := pro.Stop(ctx)
if err != nil {
- return nil, fmt.Errorf("failed to drop stats: %w", err)
+ return nil, fmt.Errorf("failed to flush queue: %w", err)
}
- if afp, ok := pro.(AutoRefreshStatsProvider); ok {
- // currently unsafe to drop stats while running refresh
- afp.CancelRefreshThread(dbName)
- }
- if bsp, ok := pro.(BranchStatsProvider); ok {
- err := bsp.DropBranchDbStats(ctx, branch, dbName, true)
- if err != nil {
- return nil, fmt.Errorf("failed to drop stats: %w", err)
- }
+ if err := pro.Purge(ctx); err != nil {
+ return "failed to purge stats", err
}
- return fmt.Sprintf("deleted stats ref for %s", dbName), nil
+ return OkResult, nil
}
-// statsPrune replaces the current disk contents with only the currently
-// tracked in memory statistics.
-func statsPrune(ctx *sql.Context) (interface{}, error) {
+// statsTimers updates the stats timers, which go into effect after the next restart.
+func statsTimers(ctx *sql.Context, args ...string) (interface{}, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
- if !ok {
- return nil, fmt.Errorf("stats not persisted, cannot purge")
+ statsPro := dSess.StatsProvider()
+
+ if len(args) != 3 {
+ return nil, fmt.Errorf("expected timer arguments (ns): (job, gc, sync)")
}
- if err := pro.Prune(ctx); err != nil {
- return "failed to prune stats databases", err
+ job, err := strconv.ParseInt(args[0], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("interval timer must be positive intergers")
}
- return "pruned all stats databases", nil
-}
-
-// statsPurge removes the stats database from disk
-func statsPurge(ctx *sql.Context) (interface{}, error) {
- dSess := dsess.DSessFromSess(ctx.Session)
- pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider)
- if !ok {
- return nil, fmt.Errorf("stats not persisted, cannot purge")
+ gc, err := strconv.ParseInt(args[1], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("interval timer must be positive intergers")
}
- if err := pro.Purge(ctx); err != nil {
- return "failed to purged databases", err
+ sync, err := strconv.ParseInt(args[2], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("interval arguments must be positive intergers")
+ }
+
+ if afp, ok := statsPro.(ToggableStats); ok {
+ afp.SetTimers(job, gc, sync)
+ return OkResult, nil
}
- return "purged all database stats", nil
+ return nil, fmt.Errorf("provider does not implement ToggableStats")
}
diff --git a/go/libraries/doltcore/sqle/dsess/session_db_provider.go b/go/libraries/doltcore/sqle/dsess/session_db_provider.go
index 3d4969bb114..05e72971747 100644
--- a/go/libraries/doltcore/sqle/dsess/session_db_provider.go
+++ b/go/libraries/doltcore/sqle/dsess/session_db_provider.go
@@ -122,6 +122,7 @@ type SqlDatabase interface {
sql.Database
sql.SchemaDatabase
sql.DatabaseSchema
+ sql.AliasedDatabase
SessionDatabase
RevisionDatabase
diff --git a/go/libraries/doltcore/sqle/dsess/variables.go b/go/libraries/doltcore/sqle/dsess/variables.go
index 848ed2218ec..0d8e0fd4edb 100644
--- a/go/libraries/doltcore/sqle/dsess/variables.go
+++ b/go/libraries/doltcore/sqle/dsess/variables.go
@@ -59,12 +59,12 @@ const (
DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch"
DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs"
- DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled"
- DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled"
- DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold"
- DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval"
- DoltStatsMemoryOnly = "dolt_stats_memory_only"
- DoltStatsBranches = "dolt_stats_branches"
+ DoltStatsEnabled = "dolt_stats_enabled"
+ DoltStatsMemoryOnly = "dolt_stats_memory_only"
+ DoltStatsBranches = "dolt_stats_branches"
+ DoltStatsJobInterval = "dolt_stats_job_interval"
+ DoltStatsBranchInterval = "dolt_stats_branch_interval"
+ DoltStatsGCInterval = "dolt_stats_gc_interval"
)
const URLTemplateDatabasePlaceholder = "{database}"
diff --git a/go/libraries/doltcore/sqle/dtables/statistics_table.go b/go/libraries/doltcore/sqle/dtables/statistics_table.go
index fda463e7e49..f73cfaf192b 100644
--- a/go/libraries/doltcore/sqle/dtables/statistics_table.go
+++ b/go/libraries/doltcore/sqle/dtables/statistics_table.go
@@ -68,7 +68,7 @@ func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) {
}
type BranchStatsProvider interface {
- GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error)
+ GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error)
}
// RowCount implements sql.StatisticsTable
@@ -119,14 +119,19 @@ func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) {
// PartitionRows is a sql.Table interface function that gets a row iterator for a partition
func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) {
dSess := dsess.DSessFromSess(ctx.Session)
- statsPro := dSess.StatsProvider().(BranchStatsProvider)
+ statsPro, ok := dSess.StatsProvider().(BranchStatsProvider)
+ if !ok {
+ return sql.RowsToRowIter(), nil
+ }
var dStats []sql.Statistic
for _, table := range st.tableNames {
dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, st.schemaName, table)
if err != nil {
return nil, err
}
- dStats = append(dStats, dbStats...)
+ for _, s := range dbStats {
+ dStats = append(dStats, s)
+ }
}
return stats.NewStatsIter(ctx, dStats...)
}
diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
index 0c76b18fa51..903408a5f62 100644
--- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
+++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
@@ -1452,11 +1452,6 @@ func TestStatBranchTests(t *testing.T) {
RunStatBranchTests(t, harness)
}
-func TestStatsFunctions(t *testing.T) {
- harness := newDoltEnginetestHarness(t)
- RunStatsFunctionsTest(t, harness)
-}
-
func TestDiffTableFunction(t *testing.T) {
harness := newDoltEnginetestHarness(t)
RunDiffTableFunctionTests(t, harness)
@@ -1663,11 +1658,6 @@ func TestStatsStorage(t *testing.T) {
RunStatsStorageTests(t, h)
}
-func TestStatsIOWithoutReload(t *testing.T) {
- h := newDoltEnginetestHarness(t)
- RunStatsIOTestsWithoutReload(t, h)
-}
-
func TestJoinStats(t *testing.T) {
h := newDoltEnginetestHarness(t)
RunJoinStatsTests(t, h)
@@ -1953,22 +1943,23 @@ func TestStatsAutoRefreshConcurrency(t *testing.T) {
// Setting an interval of 0 and a threshold of 0 will result
// in the stats being updated after every operation
- intervalSec := time.Duration(0)
- thresholdf64 := 0.
- bThreads := sql.NewBackgroundThreads()
- branches := []string{"main"}
- statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.Provider)
+ //intervalSec := time.Duration(0)
+ //thresholdf64 := 0.
+ //bThreads := sql.NewBackgroundThreads()
+ //branches := []string{"main"}
+ statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.StatsCoord)
// it is important to use new sessions for this test, to avoid working root conflicts
readCtx := enginetest.NewSession(harness)
writeCtx := enginetest.NewSession(harness)
refreshCtx := enginetest.NewSession(harness)
- newCtx := func(context.Context) (*sql.Context, error) {
- return refreshCtx, nil
- }
- err := statsProv.InitAutoRefreshWithParams(newCtx, sqlDb.Name(), bThreads, intervalSec, thresholdf64, branches)
+ fs, err := engine.EngineAnalyzer().Catalog.DbProvider.(*sqle.DoltDatabaseProvider).FileSystemForDatabase(sqlDb.AliasedName())
+ require.NoError(t, err)
+
+ statsProv.AddFs(sqlDb, fs)
require.NoError(t, err)
+ <-done
execQ := func(ctx *sql.Context, q string, id int, tag string) {
_, iter, _, err := engine.Query(ctx, q)
diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
index efd221635f4..0747f743b1b 100755
--- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
+++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go
@@ -268,7 +268,6 @@ func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) {
}
defer harness.Close()
- sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0)
enginetest.TestQueryPlans(t, harness, queries.PlanTests)
}
@@ -1165,21 +1164,6 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) enginetest.QueryEngine {
return e
}
-func RunStatsFunctionsTest(t *testing.T, harness DoltEnginetestHarness) {
- defer harness.Close()
- for _, test := range StatProcTests {
- t.Run(test.Name, func(t *testing.T) {
- // reset engine so provider statistics are clean
- harness = harness.NewHarness(t).WithConfigureStats(true)
- harness.Setup(setup.MydbData)
- harness.SkipSetupCommit()
- e := mustNewEngine(t, harness)
- defer e.Close()
- enginetest.TestScriptWithEngine(t, e, harness, test)
- })
- }
-}
-
func RunDiffTableFunctionTests(t *testing.T, harness DoltEnginetestHarness) {
for _, test := range DiffTableFunctionScriptTests {
t.Run(test.Name, func(t *testing.T) {
@@ -1562,27 +1546,12 @@ func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) {
for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
func() {
h = h.NewHarness(t).WithConfigureStats(true)
- defer h.Close()
e := mustNewEngine(t, h)
if enginetest.IsServerEngine(e) {
return
}
defer e.Close()
- TestProviderReloadScriptWithEngine(t, e, h, script)
- }()
- }
-}
-
-func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) {
- for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) {
- func() {
- h = h.NewHarness(t).WithConfigureStats(true)
defer h.Close()
- e := mustNewEngine(t, h)
- if enginetest.IsServerEngine(e) {
- return
- }
- defer e.Close()
enginetest.TestScriptWithEngine(t, e, h, script)
}()
}
diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go
index 4dbcd2be283..20bd5de519e 100644
--- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go
+++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go
@@ -20,6 +20,7 @@ import (
"runtime"
"strings"
"testing"
+ "time"
gms "github.com/dolthub/go-mysql-server"
"github.com/dolthub/go-mysql-server/enginetest"
@@ -36,7 +37,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
@@ -46,7 +46,7 @@ import (
type DoltHarness struct {
t *testing.T
provider dsess.DoltDatabaseProvider
- statsPro sql.StatsProvider
+ statsPro *statspro.StatsCoord
multiRepoEnv *env.MultiRepoEnv
session *dsess.DoltSession
branchControl *branch_control.Controller
@@ -246,13 +246,20 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
d.gcSafepointController = dsess.NewGCSafepointController()
- statsProv := statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
- d.statsPro = statsProv
-
var err error
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, d.gcSafepointController)
require.NoError(t, err)
+ sqlCtx := enginetest.NewContext(d)
+ bThreads := sql.NewBackgroundThreads()
+
+ ctxGen := func(ctx context.Context) (*sql.Context, error) {
+ return d.NewContextWithClient(sql.Client{Address: "localhost", User: "root"}), nil
+ }
+ statsPro := statspro.NewStatsCoord(ctx, doltProvider, ctxGen, sqlCtx.Session.GetLogger().Logger, bThreads, d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase()))
+ statsPro.SetTimers(int64(1*time.Nanosecond), int64(1*time.Second), int64(1*time.Second))
+ d.statsPro = statsPro
+
e, err := enginetest.NewEngine(t, d, d.provider, d.setupData, d.statsPro)
if err != nil {
return nil, err
@@ -260,8 +267,8 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
e.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{})
d.engine = e
- sqlCtx := enginetest.NewContext(d)
databases := pro.AllDatabases(sqlCtx)
+
d.setupDbs = make(map[string]struct{})
var dbs []string
for _, db := range databases {
@@ -281,23 +288,21 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
require.NoError(t, err)
}
- if d.configureStats {
- bThreads := sql.NewBackgroundThreads()
- e = e.WithBackgroundThreads(bThreads)
+ e = e.WithBackgroundThreads(bThreads)
- dSess := dsess.DSessFromSess(sqlCtx.Session)
- dbCache := dSess.DatabaseCache(sqlCtx)
-
- dsessDbs := make([]dsess.SqlDatabase, len(dbs))
- for i, dbName := range dbs {
- dsessDbs[i], _ = dbCache.GetCachedRevisionDb(fmt.Sprintf("%s/main", dbName), dbName)
+ if d.configureStats {
+ var dsessDbs []dsess.SqlDatabase
+ for _, db := range databases {
+ if sqlDb, ok := db.(dsess.SqlDatabase); ok {
+ dsessDbs = append(dsessDbs, sqlDb)
+ }
}
-
- ctxFact := func(context.Context) (*sql.Context, error) {
- sess := d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"})
- return sql.NewContext(context.Background(), sql.WithSession(sess)), nil
+ if err := statsPro.Init(ctx, dsessDbs, false); err != nil {
+ return nil, err
}
- if err = statsProv.Configure(sqlCtx, ctxFact, bThreads, dsessDbs); err != nil {
+
+ err = statsPro.Restart(ctx)
+ if err != nil {
return nil, err
}
@@ -309,13 +314,20 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) {
}
// Reset the mysql DB table to a clean state for this new engine
+ ctx := enginetest.NewContext(d)
+
d.engine.Analyzer.Catalog.MySQLDb = mysql_db.CreateEmptyMySQLDb()
d.engine.Analyzer.Catalog.MySQLDb.AddRootAccount()
- d.engine.Analyzer.Catalog.StatsProvider = statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
- var err error
- sqlCtx := enginetest.NewContext(d)
- e, err := enginetest.RunSetupScripts(sqlCtx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
+ ctxGen := func(ctx context.Context) (*sql.Context, error) {
+ return d.NewContext(), nil
+ }
+ bThreads := sql.NewBackgroundThreads()
+ statsPro := statspro.NewStatsCoord(ctx, d.provider.(*sqle.DoltDatabaseProvider), ctxGen, ctx.Session.GetLogger().Logger, bThreads, d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase()))
+ require.NoError(t, statsPro.Restart(ctx))
+ d.engine.Analyzer.Catalog.StatsProvider = statsPro
+
+ e, err := enginetest.RunSetupScripts(ctx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation())
// Get a fresh session after running setup scripts, since some setup scripts can change the session state
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
@@ -430,7 +442,6 @@ func (d *DoltHarness) NewDatabases(names ...string) []sql.Database {
doltProvider, ok := pro.(*sqle.DoltDatabaseProvider)
require.True(d.t, ok)
d.provider = doltProvider
- d.statsPro = statspro.NewProvider(doltProvider, statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider()))
var err error
d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), doltProvider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil)
@@ -502,7 +513,10 @@ func (d *DoltHarness) NewDatabaseProvider() sql.MutableDatabaseProvider {
func (d *DoltHarness) Close() {
d.closeProvider()
- sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, int8(0))
+ if d.statsPro != nil {
+ d.statsPro.Close()
+ }
+ sql.SystemVariables.SetGlobal(dsess.DoltStatsEnabled, int8(0))
}
func (d *DoltHarness) closeProvider() {
diff --git a/go/libraries/doltcore/sqle/enginetest/stats_queries.go b/go/libraries/doltcore/sqle/enginetest/stats_queries.go
index fedb7297d5f..3efc0a41288 100644
--- a/go/libraries/doltcore/sqle/enginetest/stats_queries.go
+++ b/go/libraries/doltcore/sqle/enginetest/stats_queries.go
@@ -17,17 +17,12 @@ package enginetest
import (
"fmt"
"strings"
- "testing"
- gms "github.com/dolthub/go-mysql-server"
- "github.com/dolthub/go-mysql-server/enginetest"
"github.com/dolthub/go-mysql-server/enginetest/queries"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
- "github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
)
// fillerVarchar pushes the tree into level 3
@@ -510,8 +505,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
{
Name: "incremental stats deletes auto",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
"analyze table xy",
@@ -525,10 +518,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
Query: "delete from xy where x > 500",
},
{
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.1)",
+ Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics group by table_name, index_name",
@@ -540,8 +530,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{
// https://github.com/dolthub/dolt/issues/8504
Name: "alter index column type",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
@@ -569,78 +557,9 @@ var DoltStatsStorageTests = []queries.ScriptTest{
},
},
},
- {
- Name: "differentiate table cases",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches ='main'",
- "CREATE table XY (x bigint primary key, y varchar(16))",
- "insert into XY values (0,'0'), (1,'1'), (2,'2')",
- "analyze table XY",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "2"}},
- },
- },
- },
- {
- Name: "deleted table loads OK",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches ='main'",
- "CREATE table xy (x bigint primary key, y varchar(16))",
- "insert into xy values (0,'0'), (1,'1'), (2,'2')",
- "analyze table xy",
- "CREATE table uv (u bigint primary key, v varchar(16))",
- "insert into uv values (0,'0'), (1,'1'), (2,'2')",
- "analyze table uv",
- "drop table uv",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "2"}},
- },
- },
- },
- {
- Name: "differentiate branch names",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches ='main,feat'",
- "CREATE table xy (x bigint primary key, y varchar(16))",
- "insert into xy values (0,'0'), (1,'1'), (2,'2')",
- "analyze table xy",
- "call dolt_checkout('-b', 'feat')",
- "CREATE table xy (x varchar(16) primary key, y bigint, z bigint)",
- "insert into xy values (3,'3',3)",
- "analyze table xy",
- "call dolt_checkout('main')",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "2"}},
- },
- {
- Query: "call dolt_checkout('feat')",
- },
- {
- Query: "select table_name, upper_bound from dolt_statistics",
- Expected: []sql.Row{{"xy", "3"}},
- },
- },
- },
{
Name: "drop primary key",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
"CREATE table xy (x bigint primary key, y varchar(16))",
"insert into xy values (0,'0'), (1,'1'), (2,'2')",
"analyze table xy",
@@ -657,10 +576,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{
Query: "insert into xy values ('3', '3')",
},
{
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.2)",
+ Query: "analyze table xy",
},
{
Query: "select count(*) from dolt_statistics group by table_name, index_name",
@@ -674,9 +590,6 @@ var StatBranchTests = []queries.ScriptTest{
{
Name: "multi branch stats",
SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;",
- "set @@PERSIST.dolt_stats_branches = 'main,feat';",
"CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
"insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')",
"call dolt_commit('-Am', 'xy')",
@@ -688,10 +601,7 @@ var StatBranchTests = []queries.ScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.1)",
+ Query: "call dolt_stats_sync()",
},
{
Query: "select table_name, index_name, row_count from dolt_statistics",
@@ -726,7 +636,7 @@ var StatBranchTests = []queries.ScriptTest{
Query: "call dolt_commit('-am', 'cm')",
},
{
- Query: "select sleep(.1)",
+ Query: "call dolt_stats_wait()",
},
{
Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
@@ -744,30 +654,6 @@ var StatBranchTests = []queries.ScriptTest{
{"xy", "y", uint64(6)},
},
},
- {
- Query: "call dolt_checkout('feat')",
- },
- {
- Query: "call dolt_stats_stop()",
- },
- {
- Query: "select sleep(.1)",
- },
- {
- Query: "call dolt_stats_drop()",
- },
- {
- Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'",
- Expected: []sql.Row{},
- },
- {
- // we dropped 'feat', not 'main'
- Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'",
- Expected: []sql.Row{
- {"xy", "primary", uint64(6)},
- {"xy", "y", uint64(6)},
- },
- },
},
},
{
@@ -787,302 +673,3 @@ var StatBranchTests = []queries.ScriptTest{
},
},
}
-
-var StatProcTests = []queries.ScriptTest{
- {
- Name: "deleting stats removes information_schema access point",
- SetUpScript: []string{
- "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
- "insert into xy values (0,0,0)",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "analyze table xy",
- },
- {
- Query: "select count(*) from information_schema.column_statistics",
- Expected: []sql.Row{{2}},
- },
- {
- Query: "call dolt_stats_drop()",
- },
- {
- Query: "select count(*) from information_schema.column_statistics",
- Expected: []sql.Row{{0}},
- },
- },
- },
- {
- Name: "restart empty stats panic",
- SetUpScript: []string{
- "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "analyze table xy",
- },
- {
- Query: "select count(*) from dolt_statistics",
- Expected: []sql.Row{{0}},
- },
- {
- Query: "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0",
- Expected: []sql.Row{{}},
- },
- {
- Query: "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0",
- Expected: []sql.Row{{}},
- },
- {
- // don't panic
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.1)",
- },
- {
- Query: "insert into xy values (0,0,0)",
- },
- {
- Query: "select sleep(.1)",
- },
- {
- Query: "select count(*) from dolt_statistics",
- Expected: []sql.Row{{2}},
- },
- },
- },
- {
- Name: "basic start, status, stop loop",
- SetUpScript: []string{
- "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
- "insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select count(*) from dolt_statistics",
- Expected: []sql.Row{{0}},
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"no active stats thread"}},
- },
- // set refresh interval arbitrarily high to avoid updating when we restart
- {
- Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;",
- Expected: []sql.Row{{}},
- },
- {
- Query: "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0",
- Expected: []sql.Row{{}},
- },
- {
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"restarted thread: mydb"}},
- },
- {
- Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;",
- Expected: []sql.Row{{}},
- },
- // new restart picks up 0-interval, will start refreshing immediately
- {
- Query: "call dolt_stats_restart()",
- },
- {
- Query: "select sleep(.1)",
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"refreshed mydb"}},
- },
- {
- Query: "select count(*) from dolt_statistics",
- Expected: []sql.Row{{2}},
- },
- // kill refresh thread
- {
- Query: "call dolt_stats_stop()",
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"cancelled thread: mydb"}},
- },
- // insert without refresh thread will not update stats
- {
- Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')",
- },
- {
- Query: "select sleep(.1)",
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"cancelled thread: mydb"}},
- },
- // manual analyze will update stats
- {
- Query: "analyze table xy",
- Expected: []sql.Row{{"xy", "analyze", "status", "OK"}},
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"refreshed mydb"}},
- },
- {
- Query: "select count(*) from dolt_statistics",
- Expected: []sql.Row{{2}},
- },
- // kill refresh thread and delete stats ref
- {
- Query: "call dolt_stats_drop()",
- },
- {
- Query: "call dolt_stats_status()",
- Expected: []sql.Row{{"dropped"}},
- },
- {
- Query: "select count(*) from dolt_statistics",
- Expected: []sql.Row{{0}},
- },
- },
- },
- {
- Name: "test purge",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;",
- "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
- "insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');",
- "analyze table xy",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
- Expected: []sql.Row{{1}, {1}},
- },
- {
- Query: "call dolt_stats_purge()",
- },
- {
- Query: "select count(*) from dolt_statistics;",
- Expected: []sql.Row{{0}},
- },
- },
- },
- {
- Name: "test prune",
- SetUpScript: []string{
- "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;",
- "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));",
- "insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');",
- "analyze table xy",
- },
- Assertions: []queries.ScriptTestAssertion{
- {
- Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt",
- Expected: []sql.Row{{1}, {1}},
- },
- {
- Query: "call dolt_stats_prune()",
- },
- {
- Query: "select count(*) from dolt_statistics;",
- Expected: []sql.Row{{2}},
- },
- },
- },
-}
-
-// TestProviderReloadScriptWithEngine runs the test script given with the engine provided.
-func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) {
- ctx := enginetest.NewContext(harness)
- err := enginetest.CreateNewConnectionForServerEngine(ctx, e)
- require.NoError(t, err, nil)
-
- t.Run(script.Name, func(t *testing.T) {
- for _, statement := range script.SetUpScript {
- if sh, ok := harness.(enginetest.SkippingHarness); ok {
- if sh.SkipQueryTest(statement) {
- t.Skip()
- }
- }
- ctx = ctx.WithQuery(statement)
- enginetest.RunQueryWithContext(t, e, harness, ctx, statement)
- }
-
- assertions := script.Assertions
- if len(assertions) == 0 {
- assertions = []queries.ScriptTestAssertion{
- {
- Query: script.Query,
- Expected: script.Expected,
- ExpectedErr: script.ExpectedErr,
- ExpectedIndexes: script.ExpectedIndexes,
- },
- }
- }
-
- {
- // reload provider, get disk stats
- eng, ok := e.(*gms.Engine)
- if !ok {
- t.Errorf("expected *gms.Engine but found: %T", e)
- }
-
- branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb")
- brCopy := make([]string, len(branches))
- copy(brCopy, branches)
- err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false)
- require.NoError(t, err)
- for _, branch := range brCopy {
- err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch)
- require.NoError(t, err)
- }
- }
-
- for _, assertion := range assertions {
- t.Run(assertion.Query, func(t *testing.T) {
- if assertion.NewSession {
- th, ok := harness.(enginetest.TransactionHarness)
- require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+
- "but harness doesn't implement TransactionHarness")
- ctx = th.NewSession()
- }
-
- if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) {
- t.Skip()
- }
- if assertion.Skip {
- t.Skip()
- }
-
- if assertion.ExpectedErr != nil {
- enginetest.AssertErr(t, e, harness, assertion.Query, nil, assertion.ExpectedErr)
- } else if assertion.ExpectedErrStr != "" {
- enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, nil, assertion.ExpectedErrStr)
- } else if assertion.ExpectedWarning != 0 {
- enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query,
- assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount,
- assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck)
- } else if assertion.SkipResultsCheck {
- enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query)
- } else if assertion.CheckIndexedAccess {
- enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
- } else {
- var expected = assertion.Expected
- if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine {
- // TODO: remove this check in the future
- expected = nil
- }
- enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings, nil)
- }
- })
- }
- })
-}
-
-func mustNewStatQual(s string) sql.StatQualifier {
- qual, _ := sql.NewQualifierFromString(s)
- return qual
-}
diff --git a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go
index 1bd7861ebaf..ac93e62d733 100644
--- a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go
+++ b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go
@@ -33,7 +33,6 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/env"
dsql "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
@@ -145,7 +144,7 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error {
}
config, _ := dEnv.Config.GetConfig(env.GlobalConfig)
- sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.NewProvider(pro.(*dsql.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(env.NewGRPCDialProviderFromDoltEnv(dEnv))), dsess.NewGCSafepointController())
+ sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.StatsNoop{}, dsess.NewGCSafepointController())
h.sess = sqlCtx.Session.(*dsess.DoltSession)
dbs := h.engine.Analyzer.Catalog.AllDatabases(sqlCtx)
diff --git a/go/libraries/doltcore/sqle/sqlddl_test.go b/go/libraries/doltcore/sqle/sqlddl_test.go
index e0cea917018..7088079dd86 100644
--- a/go/libraries/doltcore/sqle/sqlddl_test.go
+++ b/go/libraries/doltcore/sqle/sqlddl_test.go
@@ -1128,6 +1128,7 @@ func newTestEngine(ctx context.Context, dEnv *env.DoltEnv) (*gms.Engine, *sql.Co
IsServerLocked: false,
}), sqlCtx
}
+
func TestIndexOverwrite(t *testing.T) {
ctx := context.Background()
dEnv := dtestutils.CreateTestEnv()
diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go
deleted file mode 100644
index 6a972a3b103..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/database.go
+++ /dev/null
@@ -1,488 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "context"
- "errors"
- "fmt"
- "path"
- "strings"
- "sync"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
- "github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
- "github.com/dolthub/dolt/go/libraries/utils/earl"
- "github.com/dolthub/dolt/go/libraries/utils/filesys"
- "github.com/dolthub/dolt/go/store/datas"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/types"
-)
-
-func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory {
- return &NomsStatsFactory{dialPro: dialPro}
-}
-
-type NomsStatsFactory struct {
- dialPro dbfactory.GRPCDialProvider
-}
-
-var _ statspro.StatsFactory = NomsStatsFactory{}
-
-func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) {
- params := make(map[string]interface{})
- params[dbfactory.GRPCDialProviderParam] = sf.dialPro
-
- var urlPath string
- u, err := earl.Parse(prov.DbFactoryUrl())
- if u.Scheme == dbfactory.MemScheme {
- urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir)
- } else if u.Scheme == dbfactory.FileScheme {
- urlPath = doltdb.LocalDirDoltDB
- }
-
- statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
- if err != nil {
- return nil, err
- }
-
- var dEnv *env.DoltEnv
- exists, isDir := statsFs.Exists("")
- if !exists {
- err := statsFs.MkDirs("")
- if err != nil {
- return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
- }
-
- dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test")
- sess := dsess.DSessFromSess(ctx.Session)
- err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch())
- if err != nil {
- return nil, err
- }
- } else if !isDir {
- return nil, fmt.Errorf("file exists where the dolt stats directory should be")
- } else {
- dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "", "")
- }
-
- dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params)
-
- deaf := dEnv.DbEaFactory(ctx)
-
- tmpDir, err := dEnv.TempTableFilesDir()
- if err != nil {
- return nil, err
- }
- opts := editor.Options{
- Deaf: deaf,
- Tempdir: tmpDir,
- }
- statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
- if err != nil {
- return nil, err
- }
- return NewNomsStats(sourceDb, statsDb), nil
-}
-
-func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase {
- return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb}
-}
-
-type dbStats map[sql.StatQualifier]*statspro.DoltStats
-
-type NomsStatsDatabase struct {
- mu *sync.Mutex
- destDb dsess.SqlDatabase
- sourceDb dsess.SqlDatabase
- stats []dbStats
- branches []string
- tableHashes []map[string]hash.Hash
- schemaHashes []map[string]hash.Hash
- dirty []*prolly.MutableMap
-}
-
-var _ statspro.Database = (*NomsStatsDatabase)(nil)
-
-func (n *NomsStatsDatabase) Close() error {
- return n.destDb.DbData().Ddb.Close()
-}
-
-func (n *NomsStatsDatabase) Branches() []string {
- return n.branches
-}
-
-func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error {
- branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch)
-
- dSess := dsess.DSessFromSess(ctx.Session)
- sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
- if err != nil {
- ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch)
- return nil
- }
- branchQDb, ok := sqlDb.(dsess.SqlDatabase)
- if !ok {
- return fmt.Errorf("branch/database not found: %s", branchQDbName)
- }
-
- if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil {
- return err
- } else if ok {
- ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name())
- if err := n.DeleteBranchStats(ctx, branch, true); err != nil {
- return err
- }
- }
-
- statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch)
- if errors.Is(err, doltdb.ErrNoStatistics) {
- return n.trackBranch(ctx, branch)
- } else if errors.Is(err, datas.ErrNoBranchStats) {
- return n.trackBranch(ctx, branch)
- } else if err != nil {
- return err
- }
- if cnt, err := statsMap.Count(); err != nil {
- return err
- } else if cnt == 0 {
- return n.trackBranch(ctx, branch)
- }
-
- doltStats, err := loadStats(ctx, branchQDb, statsMap)
- if err != nil {
- return err
- }
- n.branches = append(n.branches, branch)
- n.stats = append(n.stats, doltStats)
- n.dirty = append(n.dirty, nil)
- n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
- n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
- return nil
-}
-
-func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) {
- root, err := branchQDb.GetRoot(ctx)
- if err != nil {
- return false, err
- }
- tables, err := branchQDb.GetTableNames(ctx)
- if err != nil {
- return false, err
- }
-
- var keys []string
- var schHashes []hash.Hash
- for _, tableName := range tables {
- table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName})
- if err != nil {
- return false, err
- }
- if !ok {
- return false, nil
- }
- curHash, err := table.GetSchemaHash(ctx)
- if err != nil {
- return false, err
- }
-
- keys = append(keys, n.schemaTupleKey(branch, tableName))
- schHashes = append(schHashes, curHash)
- }
-
- ddb := n.destDb.DbData().Ddb
- var schemaChange bool
- for i, key := range keys {
- curHash := schHashes[i]
- if val, ok, err := ddb.GetTuple(ctx, key); err != nil {
- return false, err
- } else if ok {
- oldHash := hash.Parse(string(val))
- if !ok || !oldHash.Equal(curHash) {
- schemaChange = true
- break
- }
- }
- }
- if schemaChange {
- for _, key := range keys {
- ddb.DeleteTuple(ctx, key)
- }
- return true, nil
- }
- return false, nil
-}
-
-func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats {
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- return n.stats[i]
- }
- }
- return nil
-}
-
-func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) {
- n.mu.Lock()
- defer n.mu.Unlock()
- stats := n.getBranchStats(branch)
- ret, ok := stats[qual]
- return ret, ok
-}
-
-func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier {
- n.mu.Lock()
- defer n.mu.Unlock()
- stats := n.getBranchStats(branch)
- var ret []sql.StatQualifier
- for qual, _ := range stats {
- ret = append(ret, qual)
- }
- return ret
-}
-
-func (n *NomsStatsDatabase) setStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
- var statsMap *prolly.MutableMap
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- n.stats[i][qual] = stats
- if n.dirty[i] == nil {
- if err := n.initMutable(ctx, i); err != nil {
- return err
- }
- }
- statsMap = n.dirty[i]
- }
- }
- if statsMap == nil {
- if err := n.trackBranch(ctx, branch); err != nil {
- return err
- }
- statsMap = n.dirty[len(n.branches)-1]
- n.stats[len(n.branches)-1][qual] = stats
- }
-
- return n.replaceStats(ctx, statsMap, stats)
-}
-func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- return n.setStat(ctx, branch, qual, stats)
-}
-
-func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error {
- n.branches = append(n.branches, branch)
- n.stats = append(n.stats, make(dbStats))
- n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash))
- n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash))
-
- kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
- newMap, err := prolly.NewMapFromTuples(ctx, n.destDb.DbData().Ddb.NodeStore(), kd, vd)
- if err != nil {
- return err
- }
- n.dirty = append(n.dirty, newMap.Mutate())
- return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf())
-}
-
-func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error {
- statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i])
- if err != nil {
- return err
- }
- n.dirty[i] = statsMap.Mutate()
- return nil
-}
-
-func (n *NomsStatsDatabase) DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- for _, qual := range quals {
- ctx.GetLogger().Debugf("statistics refresh: deleting index statistics: %s/%s", branch, qual)
- delete(n.stats[i], qual)
- }
- }
- }
-}
-
-func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- ctx.GetLogger().Debugf("statistics refresh: deleting branch statistics: %s", branch)
-
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- n.branches = append(n.branches[:i], n.branches[i+1:]...)
- n.dirty = append(n.dirty[:i], n.dirty[i+1:]...)
- n.stats = append(n.stats[:i], n.stats[i+1:]...)
- n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...)
- n.schemaHashes = append(n.schemaHashes[:i], n.schemaHashes[i+1:]...)
- }
- }
- if flush {
- return n.destDb.DbData().Ddb.DropStatisics(ctx, branch)
- }
- return nil
-}
-
-func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- var dbStat dbStats
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- // naive merge the new with old
- dbStat = n.stats[i]
- }
- }
-
- if dbStat == nil {
- if err := n.trackBranch(ctx, branch); err != nil {
- return err
- }
- dbStat = n.stats[len(n.branches)-1]
- }
-
- if _, ok := dbStat[qual]; ok {
- oldChunks := dbStat[qual].Hist
- targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks)
- if err != nil {
- return err
- }
- newStat, err := dbStat[qual].WithHistogram(targetBuckets)
- if err != nil {
- return err
- }
- dbStat[qual] = newStat.(*statspro.DoltStats)
- } else {
- dbStat[qual] = statspro.NewDoltStats()
- }
- dbStat[qual].Chunks = targetHashes
- dbStat[qual].UpdateActive()
-
- // let |n.SetStats| update memory and disk
- return n.setStat(ctx, branch, qual, dbStat[qual])
-}
-
-func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error {
- n.mu.Lock()
- defer n.mu.Unlock()
-
- for i, b := range n.branches {
- if strings.EqualFold(b, branch) {
- if n.dirty[i] != nil {
- flushedMap, err := n.dirty[i].Map(ctx)
- if err != nil {
- return err
- }
- n.dirty[i] = nil
- if err := n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()); err != nil {
- return err
- }
- return nil
- }
- }
- }
- return nil
-}
-
-func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash {
- n.mu.Lock()
- defer n.mu.Unlock()
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- return n.tableHashes[i][tableName]
- }
- }
- return hash.Hash{}
-}
-
-func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) {
- n.mu.Lock()
- defer n.mu.Unlock()
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- n.tableHashes[i][tableName] = h
- break
- }
- }
-}
-
-func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) {
- n.mu.Lock()
- defer n.mu.Unlock()
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- return n.schemaHashes[i][tableName], nil
- }
- if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok {
- if err != nil {
- return hash.Hash{}, err
- }
- h := hash.Parse(string(val))
- n.schemaHashes[i][tableName] = h
- return h, nil
- } else if err != nil {
- return hash.Hash{}, err
- }
- break
- }
- return hash.Hash{}, nil
-}
-
-func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string {
- return n.sourceDb.Name() + "/" + branch + "/" + tableName
-}
-
-func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error {
- n.mu.Lock()
- defer n.mu.Unlock()
- branchIdx := -1
- for i, b := range n.branches {
- if strings.EqualFold(branch, b) {
- branchIdx = i
- break
- }
- }
- if branchIdx < 0 {
- branchIdx = len(n.branches)
- if err := n.trackBranch(ctx, branch); err != nil {
- return err
- }
- }
-
- n.schemaHashes[branchIdx][tableName] = h
- key := n.schemaTupleKey(branch, tableName)
- if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound {
- return err
- }
-
- return n.destDb.DbData().Ddb.SetTuple(ctx, key, []byte(h.String()))
-}
diff --git a/go/libraries/doltcore/sqle/statsnoms/iter.go b/go/libraries/doltcore/sqle/statsnoms/iter.go
deleted file mode 100644
index 59b9456eed6..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/iter.go
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "fmt"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/planbuilder"
- "gopkg.in/errgo.v2/errors"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/prolly/tree"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-var ErrIncompatibleVersion = errors.New("client stats version mismatch")
-
-func NewStatsIter(ctx *sql.Context, schemaName string, m prolly.Map) (*statsIter, error) {
- iter, err := m.IterAll(ctx)
- if err != nil {
- return nil, err
- }
- kd, vd := m.Descriptors()
- keyBuilder := val.NewTupleBuilder(kd)
- valueBuilder := val.NewTupleBuilder(vd)
- ns := m.NodeStore()
-
- return &statsIter{
- iter: iter,
- kb: keyBuilder,
- vb: valueBuilder,
- ns: ns,
- schemaName: schemaName,
- planb: planbuilder.New(ctx, nil, nil, nil),
- }, nil
-}
-
-// statsIter reads histogram buckets into string-compatible types.
-// Values that are SQL rows should be converted with statsIter.ParseRow.
-// todo: make a JSON compatible container for sql.Row w/ types so that we
-// can eagerly convert to sql.Row without sacrificing string printing.
-type statsIter struct {
- iter prolly.MapIter
- kb, vb *val.TupleBuilder
- ns tree.NodeStore
- planb *planbuilder.Builder
- currentQual string
- schemaName string
- currentTypes []sql.Type
-}
-
-var _ sql.RowIter = (*statsIter)(nil)
-
-func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) {
- k, v, err := s.iter.Next(ctx)
- if err != nil {
- return nil, err
- }
-
- // deserialize K, V
- version, err := tree.GetField(ctx, s.vb.Desc, 0, v, s.ns)
- if err != nil {
- return nil, err
- }
- if version != schema.StatsVersion {
- return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
- }
-
- var row sql.Row
- for i := 0; i < s.kb.Desc.Count(); i++ {
- f, err := tree.GetField(ctx, s.kb.Desc, i, k, s.ns)
- if err != nil {
- return nil, err
- }
- row = append(row, f)
- }
-
- for i := 0; i < s.vb.Desc.Count(); i++ {
- f, err := tree.GetField(ctx, s.vb.Desc, i, v, s.ns)
- if err != nil {
- return nil, err
- }
- row = append(row, f)
- }
-
- dbName := row[schema.StatsDbTag].(string)
- tableName := row[schema.StatsTableTag].(string)
- indexName := row[schema.StatsIndexTag].(string)
- position := row[schema.StatsPositionTag].(int64)
- _ = row[schema.StatsVersionTag]
- commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
- rowCount := row[schema.StatsRowCountTag].(int64)
- distinctCount := row[schema.StatsDistinctCountTag].(int64)
- nullCount := row[schema.StatsNullCountTag].(int64)
- columnsStr := row[schema.StatsColumnsTag].(string)
- typesStr := row[schema.StatsTypesTag].(string)
- upperBoundStr := row[schema.StatsUpperBoundTag].(string)
- upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64)
- createdAt := row[schema.StatsCreatedAtTag].(time.Time)
-
- typs := strings.Split(typesStr, "\n")
- for i, t := range typs {
- typs[i] = strings.TrimSpace(t)
- }
-
- qual := sql.NewStatQualifier(dbName, s.schemaName, tableName, indexName)
- if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) {
- s.currentQual = curQual
- s.currentTypes, err = parseTypeStrings(typs)
- if err != nil {
- return nil, err
- }
- }
-
- mcvCountsStr := row[schema.StatsMcvCountsTag].(string)
-
- numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
- mcvs := make([]string, numMcvs)
- for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
- if v != nil {
- mcvs[i] = v.(string)
- }
- }
-
- return sql.Row{
- dbName,
- tableName,
- indexName,
- int(position),
- version,
- commit.String(),
- uint64(rowCount),
- uint64(distinctCount),
- uint64(nullCount),
- columnsStr,
- typesStr,
- upperBoundStr,
- uint64(upperBoundCnt),
- createdAt,
- mcvs[0], mcvs[1], mcvs[2], mcvs[3],
- mcvCountsStr,
- }, nil
-}
-
-func (s *statsIter) ParseRow(rowStr string) (sql.Row, error) {
- var row sql.Row
- for i, v := range strings.Split(rowStr, ",") {
- val, _, err := s.currentTypes[i].Convert(v)
- if err != nil {
- return nil, err
- }
- row = append(row, val)
- }
- return row, nil
-}
-
-func (s *statsIter) Close(context *sql.Context) error {
- return nil
-}
diff --git a/go/libraries/doltcore/sqle/statsnoms/load.go b/go/libraries/doltcore/sqle/statsnoms/load.go
deleted file mode 100644
index 72051260260..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/load.go
+++ /dev/null
@@ -1,308 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "errors"
- "fmt"
- "io"
- "strconv"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/planbuilder"
- "github.com/dolthub/go-mysql-server/sql/stats"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/prolly/tree"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) {
- qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats)
- schemaName := db.SchemaName()
- iter, err := NewStatsIter(ctx, schemaName, m)
- if err != nil {
- return nil, err
- }
- currentStat := statspro.NewDoltStats()
- invalidTables := make(map[string]bool)
- for {
- row, err := iter.Next(ctx)
- if errors.Is(err, io.EOF) {
- break
- } else if err != nil {
- return nil, err
- }
-
- // deserialize K, V
- dbName := row[schema.StatsDbTag].(string)
- tableName := row[schema.StatsTableTag].(string)
- indexName := row[schema.StatsIndexTag].(string)
- _ = row[schema.StatsVersionTag]
- commit := hash.Parse(row[schema.StatsCommitHashTag].(string))
- rowCount := row[schema.StatsRowCountTag].(uint64)
- distinctCount := row[schema.StatsDistinctCountTag].(uint64)
- nullCount := row[schema.StatsNullCountTag].(uint64)
- columns := strings.Split(row[schema.StatsColumnsTag].(string), ",")
- typesStr := row[schema.StatsTypesTag].(string)
- boundRowStr := row[schema.StatsUpperBoundTag].(string)
- upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64)
- createdAt := row[schema.StatsCreatedAtTag].(time.Time)
-
- typs := strings.Split(typesStr, "\n")
- for i, t := range typs {
- typs[i] = strings.TrimSpace(t)
- }
-
- qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName)
- if _, ok := invalidTables[tableName]; ok {
- continue
- }
-
- if currentStat.Statistic.Qual.String() != qual.String() {
- if !currentStat.Statistic.Qual.Empty() {
- currentStat.UpdateActive()
- qualToStats[currentStat.Statistic.Qual] = currentStat
- }
-
- currentStat = statspro.NewDoltStats()
-
- tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
- if ok {
- currentStat.Statistic.Qual = qual
- currentStat.Statistic.Cols = columns
- currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns()))
- if err != nil {
- return nil, err
- }
- } else if !ok {
- ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName)
- invalidTables[qual.Table()] = true
- continue
- } else if err != nil {
- return nil, err
- }
- }
-
- numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag
-
- mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",")
- mcvCnts := make([]uint64, numMcvs)
- for i, v := range mcvCountsStr {
- if v == "" {
- continue
- }
- val, err := strconv.Atoi(v)
- if err != nil {
- return nil, err
- }
- mcvCnts[i] = uint64(val)
- }
-
- mcvs := make([]sql.Row, numMcvs)
- for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] {
- if v != nil && v != "" {
- row, err := DecodeRow(ctx, m.NodeStore(), v.(string), currentStat.Tb)
- if err != nil {
- return nil, err
- }
- mcvs[i] = row
- }
- }
-
- for i, v := range mcvCnts {
- if v == 0 {
- mcvs = mcvs[:i]
- mcvCnts = mcvCnts[:i]
- break
- }
- }
-
- if currentStat.Statistic.Hist == nil {
- currentStat.Statistic.Typs, err = parseTypeStrings(typs)
- if err != nil {
- return nil, err
- }
- currentStat.Statistic.Qual = qual
- }
-
- boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb)
- if err != nil {
- return nil, err
- }
-
- bucket := statspro.DoltBucket{
- Chunk: commit,
- Created: createdAt,
- Bucket: &stats.Bucket{
- RowCnt: uint64(rowCount),
- DistinctCnt: uint64(distinctCount),
- NullCnt: uint64(nullCount),
- McvVals: mcvs,
- McvsCnt: mcvCnts,
- BoundCnt: upperBoundCnt,
- BoundVal: boundRow,
- },
- }
-
- currentStat.Hist = append(currentStat.Hist, bucket)
- currentStat.Statistic.RowCnt += uint64(rowCount)
- currentStat.Statistic.DistinctCnt += uint64(distinctCount)
- currentStat.Statistic.NullCnt += uint64(rowCount)
- if currentStat.Statistic.Created.Before(createdAt) {
- currentStat.Statistic.Created = createdAt
- }
- }
- if !currentStat.Qualifier().Empty() {
- currentStat.UpdateActive()
- qualToStats[currentStat.Statistic.Qual] = currentStat
- }
- return qualToStats, nil
-}
-
-func parseTypeStrings(typs []string) ([]sql.Type, error) {
- var ret []sql.Type
- for _, typ := range typs {
- ct, err := planbuilder.ParseColumnTypeString(typ)
- if err != nil {
- return nil, err
- }
- ret = append(ret, ct)
- }
- return ret, nil
-}
-
-func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) {
- root, err := db.GetRoot(ctx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- iat, ok := sqlTable.(sql.IndexAddressable)
- if !ok {
- return nil, nil, nil, sql.ColSet{}, nil
- }
-
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- var sqlIdx sql.Index
- for _, i := range indexes {
- if strings.EqualFold(i.ID(), qual.Index()) {
- sqlIdx = i
- break
- }
- }
-
- if sqlIdx == nil {
- return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
- }
-
- fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()})
- if !ok {
- return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table())
- }
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- var idx durable.Index
- if qual.Index() == "primary" {
- idx, err = table.GetRowData(ctx)
- } else {
- idx, err = table.GetIndexRowData(ctx, qual.Index())
- }
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
-
- prollyMap := durable.ProllyMapFromIndex(idx)
- keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols))
- buffPool := prollyMap.NodeStore().Pool()
-
- if cnt, err := prollyMap.Count(); err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- } else if cnt == 0 {
- return nil, keyBuilder, nil, sql.ColSet{}, nil
- }
- firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- keyBytes, _, err := firstIter.Next(ctx)
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- for i := range keyBuilder.Desc.Types {
- keyBuilder.PutRaw(i, keyBytes.GetField(i))
- }
-
- firstKey := keyBuilder.Build(buffPool)
- firstRow := make(sql.Row, keyBuilder.Desc.Count())
- for i := 0; i < keyBuilder.Desc.Count(); i++ {
- firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
- if err != nil {
- return nil, nil, nil, sql.ColSet{}, err
- }
- }
- return firstRow, keyBuilder, fds, colset, nil
-}
-
-func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) {
- tab, ok, err := db.GetTableInsensitive(ctx, qual.Table())
- if err != nil {
- return nil, sql.ColSet{}, err
- } else if !ok {
- return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table())
- }
-
- iat, ok := tab.(sql.IndexAddressable)
- if !ok {
- return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table())
- }
-
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return nil, sql.ColSet{}, err
- }
-
- var idx sql.Index
- for _, i := range indexes {
- if strings.EqualFold(i.ID(), qual.Index()) {
- idx = i
- break
- }
- }
-
- if idx == nil {
- return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index())
- }
-
- return stats.IndexFds(qual.Table(), tab.Schema(), idx)
-}
diff --git a/go/libraries/doltcore/sqle/statsnoms/write.go b/go/libraries/doltcore/sqle/statsnoms/write.go
deleted file mode 100644
index c23e1d93dc8..00000000000
--- a/go/libraries/doltcore/sqle/statsnoms/write.go
+++ /dev/null
@@ -1,181 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statsnoms
-
-import (
- "context"
- "errors"
- "io"
- "strings"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/stats"
- "github.com/dolthub/go-mysql-server/sql/types"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/schema"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro"
- "github.com/dolthub/dolt/go/store/prolly"
- "github.com/dolthub/dolt/go/store/prolly/tree"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-// About ~200 20 byte address fit in a ~4k chunk. Chunk sizes
-// are approximate, but certainly shouldn't reach the square
-// of the expected size.
-const maxBucketFanout = 200 * 200
-
-var mcvsTypes = []sql.Type{types.Int64, types.Int64, types.Int64}
-
-func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
- if err := deleteIndexRows(ctx, statsMap, dStats); err != nil {
- return err
- }
- return putIndexRows(ctx, statsMap, dStats)
-}
-
-func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- sch := schema.StatsTableDoltSchema
- kd, _ := sch.GetMapDescriptors()
-
- keyBuilder := val.NewTupleBuilder(kd)
-
- qual := dStats.Qualifier()
- pool := statsMap.NodeStore().Pool()
-
- // delete previous entries for this index -> (db, table, index, pos)
- keyBuilder.PutString(0, qual.Database)
- keyBuilder.PutString(1, qual.Table())
- keyBuilder.PutString(2, qual.Index())
- keyBuilder.PutInt64(3, 0)
- firstKey := keyBuilder.Build(pool)
- keyBuilder.PutString(0, qual.Database)
- keyBuilder.PutString(1, qual.Table())
- keyBuilder.PutString(2, qual.Index())
- keyBuilder.PutInt64(3, maxBucketFanout+1)
- maxKey := keyBuilder.Build(pool)
-
- // there is a limit on the number of buckets for a given index, iter
- // will terminate before maxBucketFanout
- iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey)
- if err != nil {
- return err
- }
-
- for {
- k, _, err := iter.Next(ctx)
- if errors.Is(err, io.EOF) {
- break
- } else if err != nil {
- return err
- }
- err = statsMap.Put(ctx, k, nil)
- if err != nil {
- return err
- }
- }
- return nil
-}
-
-func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error {
- if ctx.Err() != nil {
- return ctx.Err()
- }
- sch := schema.StatsTableDoltSchema
- kd, vd := sch.GetMapDescriptors()
-
- keyBuilder := val.NewTupleBuilder(kd)
- valueBuilder := val.NewTupleBuilder(vd)
-
- qual := dStats.Qualifier()
- pool := statsMap.NodeStore().Pool()
-
- // now add new buckets
- typesB := strings.Builder{}
- sep := ""
- for _, t := range dStats.Statistic.Typs {
- typesB.WriteString(sep + t.String())
- sep = "\n"
- }
- typesStr := typesB.String()
-
- var pos int64
- for _, h := range dStats.Hist {
- keyBuilder.PutString(0, qual.Database)
- keyBuilder.PutString(1, qual.Tab)
- keyBuilder.PutString(2, qual.Idx)
- keyBuilder.PutInt64(3, pos)
-
- valueBuilder.PutInt64(0, schema.StatsVersion)
- valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String())
- valueBuilder.PutInt64(2, int64(h.RowCount()))
- valueBuilder.PutInt64(3, int64(h.DistinctCount()))
- valueBuilder.PutInt64(4, int64(h.NullCount()))
- valueBuilder.PutString(5, strings.Join(dStats.Columns(), ","))
- valueBuilder.PutString(6, typesStr)
- boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb)
- if err != nil {
- return err
- }
- valueBuilder.PutString(7, string(boundRow))
- valueBuilder.PutInt64(8, int64(h.BoundCount()))
- valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h))
- for i, r := range h.Mcvs() {
- mcvRow, err := EncodeRow(ctx, statsMap.NodeStore(), r, dStats.Tb)
- if err != nil {
- return err
- }
- valueBuilder.PutString(10+i, string(mcvRow))
- }
- var mcvCntsRow sql.Row
- for _, v := range h.McvCounts() {
- mcvCntsRow = append(mcvCntsRow, int(v))
- }
- valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, mcvsTypes))
-
- key := keyBuilder.Build(pool)
- value := valueBuilder.Build(pool)
- statsMap.Put(ctx, key, value)
- pos++
- }
- return nil
-}
-
-func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
- for i, v := range r {
- if v == nil {
- continue
- }
- if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
- return nil, err
- }
- }
- return tb.Build(ns.Pool()), nil
-}
-
-func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
- tup := []byte(s)
- r := make(sql.Row, tb.Desc.Count())
- var err error
- for i, _ := range r {
- r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
- if err != nil {
- return nil, err
- }
- }
- return r, nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go
deleted file mode 100644
index faa1869315c..00000000000
--- a/go/libraries/doltcore/sqle/statspro/analyze.go
+++ /dev/null
@@ -1,343 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "fmt"
- "strings"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly/tree"
-)
-
-const (
- boostrapRowLimit = 2e6
-)
-
-func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return err
- }
- return p.RefreshTableStatsWithBranch(ctx, table, db, branch)
-}
-
-func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error {
- dSess := dsess.DSessFromSess(ctx.Session)
- branches := p.getStatsBranches(ctx)
- var rows uint64
- for _, branch := range branches {
- sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
- if err != nil {
- if sql.ErrDatabaseNotFound.Is(err) {
- // default branch is not valid
- continue
- }
- return err
- }
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
- for _, table := range tables {
- sqlTable, _, err := GetLatestTable(ctx, table, sqlDb)
- if err != nil {
- return err
- }
-
- if st, ok := sqlTable.(sql.StatisticsTable); ok {
- cnt, ok, err := st.RowCount(ctx)
- if ok && err == nil {
- rows += cnt
- }
- }
- if rows >= boostrapRowLimit {
- return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE
\" or \"call dolt_stats_restart()\" to collect statistics", db)
- }
-
- if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error {
- if !p.TryLockForUpdate(branch, db, table.Name()) {
- return fmt.Errorf("already updating statistics")
- }
- defer p.UnlockTable(branch, db, table.Name())
-
- dSess := dsess.DSessFromSess(ctx.Session)
-
- sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch))
- if err != nil {
- return err
- }
-
- // lock only after accessing DatabaseProvider
-
- tableName := strings.ToLower(table.Name())
- dbName := strings.ToLower(db)
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- iat, ok := table.(sql.IndexAddressableTable)
- if !ok {
- return nil
- }
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return err
- }
-
- // it's important to update WORKING session references every call
- sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb)
- if err != nil {
- return err
- }
-
- statDb, ok := p.getStatDb(dbName)
- if !ok {
- // if the stats database does not exist, initialize one
- fs, err := p.pro.FileSystemForDatabase(dbName)
- if err != nil {
- return err
- }
- sourceDb, ok := p.pro.BaseDatabase(ctx, dbName)
- if !ok {
- return sql.ErrDatabaseNotFound.New(dbName)
- }
- statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir)
- if err != nil {
- ctx.Warn(0, err.Error())
- return nil
- }
- p.setStatDb(dbName, statDb)
- }
-
- schHash, err := dTab.GetSchemaHash(ctx)
- if err != nil {
- return err
- }
-
- if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() {
- if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
- return fmt.Errorf("set schema hash error: %w", err)
- }
- } else if oldSchHash != schHash {
- ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
- if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil {
- return err
- }
-
- stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, tableName)
- if err != nil {
- return err
- }
- for _, stat := range stats {
- statDb.DeleteStats(ctx, branch, stat.Qualifier())
- }
- } else if err != nil {
- return err
- }
-
- tablePrefix := fmt.Sprintf("%s.", tableName)
- var idxMetas []indexMeta
- for _, idx := range indexes {
- cols := make([]string, len(idx.Expressions()))
- for i, c := range idx.Expressions() {
- cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
- }
-
- qual := sql.NewStatQualifier(db, schemaName, table.Name(), strings.ToLower(idx.ID()))
- curStat, ok := statDb.GetStat(branch, qual)
- if !ok {
- curStat = NewDoltStats()
- curStat.Statistic.Qual = qual
- }
- idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols)
- if err != nil {
- return err
- }
- idxMetas = append(idxMetas, idxMeta)
- }
-
- newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
- if err != nil {
- return err
- }
-
- // merge new chunks with preexisting chunks
- for _, idxMeta := range idxMetas {
- stat := newTableStats[idxMeta.qual]
- targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist)
- if err != nil {
- return err
- }
- if targetChunks == nil {
- // empty table
- continue
- }
- stat.SetChunks(idxMeta.allAddrs)
- stat.Hist = targetChunks
- stat.UpdateActive()
- if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil {
- return err
- }
- }
-
- p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
- return statDb.Flush(ctx, branch)
-}
-
-// BranchQualifiedDatabase returns a branch qualified database. If the database
-// is already branch suffixed no duplication is applied.
-func BranchQualifiedDatabase(db, branch string) string {
- suffix := fmt.Sprintf("/%s", branch)
- if !strings.HasSuffix(db, suffix) {
- return fmt.Sprintf("%s%s", db, suffix)
- }
- return db
-}
-
-// GetLatestTable will get the WORKING root table for the current database/branch
-func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) {
- var db sqle.Database
- switch d := sqlDb.(type) {
- case sqle.Database:
- db = d
- case sqle.ReadReplicaDatabase:
- db = d.Database
- default:
- return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
- }
- sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
- if err != nil {
- return nil, nil, err
- }
- if !ok {
- return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
- }
-
- var dTab *doltdb.Table
- switch t := sqlTable.(type) {
- case *sqle.AlterableDoltTable:
- dTab, err = t.DoltTable.DoltTable(ctx)
- case *sqle.WritableDoltTable:
- dTab, err = t.DoltTable.DoltTable(ctx)
- case *sqle.DoltTable:
- dTab, err = t.DoltTable(ctx)
- default:
- err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
- }
- if err != nil {
- return nil, nil, err
- }
- return sqlTable, dTab, nil
-}
-
-func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) {
- var idx durable.Index
- var err error
- if strings.EqualFold(sqlIndex.ID(), "PRIMARY") {
- idx, err = doltTable.GetRowData(ctx)
- } else {
- idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID())
- }
- if err != nil {
- return indexMeta{}, err
- }
-
- prollyMap := durable.ProllyMapFromIndex(idx)
-
- if cnt, err := prollyMap.Count(); err != nil {
- return indexMeta{}, err
- } else if cnt == 0 {
- return indexMeta{
- qual: curStats.Statistic.Qual,
- cols: cols,
- }, nil
- }
-
- // get newest histogram target level hashes
- levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
- if err != nil {
- return indexMeta{}, err
- }
-
- var addrs []hash.Hash
- var keepChunks []sql.HistogramBucket
- var missingAddrs float64
- var missingChunks []tree.Node
- var missingOffsets []updateOrdinal
- var offset uint64
-
- for _, n := range levelNodes {
- // Compare the previous histogram chunks to the newest tree chunks.
- // Partition the newest chunks into 1) preserved or 2) missing.
- // Missing chunks will need to be scanned on a stats update, so
- // track the (start, end) ordinal offsets to simplify the read iter.
- treeCnt, err := n.TreeCount()
- if err != nil {
- return indexMeta{}, err
- }
-
- addrs = append(addrs, n.HashOf())
- if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok {
- missingChunks = append(missingChunks, n)
- missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)})
- missingAddrs++
- } else {
- keepChunks = append(keepChunks, curStats.Hist[bucketIdx])
- }
- offset += uint64(treeCnt)
- }
-
- var dropChunks []sql.HistogramBucket
- for _, h := range curStats.Chunks {
- var match bool
- for _, b := range keepChunks {
- if DoltBucketChunk(b) == h {
- match = true
- break
- }
- }
- if !match {
- dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]])
- }
- }
-
- return indexMeta{
- qual: curStats.Statistic.Qual,
- cols: cols,
- newNodes: missingChunks,
- updateOrdinals: missingOffsets,
- keepChunks: keepChunks,
- dropChunks: dropChunks,
- allAddrs: addrs,
- }, nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go
deleted file mode 100644
index 3322065f809..00000000000
--- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "fmt"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- types2 "github.com/dolthub/go-mysql-server/sql/types"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
-)
-
-const asyncAutoRefreshStats = "async_auto_refresh_stats"
-
-func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error {
- _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
- _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
- interval64, _, _ := types2.Int64.Convert(interval)
- intervalSec := time.Second * time.Duration(interval64.(int64))
- thresholdf64 := threshold.(float64)
-
- ctx, err := ctxFactory(context.Background())
- if err != nil {
- return err
- }
-
- branches := p.getStatsBranches(ctx)
-
- return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches)
-}
-
-func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error {
- // this is only called after initial statistics are finished loading
- // launch a thread that periodically checks freshness
-
- p.mu.Lock()
- defer p.mu.Unlock()
-
- dropDbCtx, dbStatsCancel := context.WithCancel(context.Background())
- p.autoCtxCancelers[dbName] = dbStatsCancel
-
- return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) {
- ticker := time.NewTicker(checkInterval + time.Nanosecond)
- for {
- select {
- case <-ctx.Done():
- ticker.Stop()
- return
- case <-ticker.C:
- select {
- case <-dropDbCtx.Done():
- ticker.Stop()
- return
- default:
- }
-
- sqlCtx, err := ctxFactory(ctx)
- if err != nil {
- return
- }
-
- dSess := dsess.DSessFromSess(sqlCtx.Session)
- ddb, ok := dSess.GetDoltDB(sqlCtx, dbName)
- if !ok {
- sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName)
- return
- }
- for _, branch := range branches {
- if br, ok, err := ddb.HasBranch(ctx, branch); ok {
- sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String())
- // update WORKING session references
- sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch))
- if err != nil {
- sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
- return
- }
-
- if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil {
- sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
- return
- }
- } else if err != nil {
- sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error())
- } else {
- sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br)
- }
- }
- }
- }
- })
-}
-
-func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error {
- if !p.TryLockForUpdate(branch, dbName, "") {
- return fmt.Errorf("database already being updated: %s/%s", branch, dbName)
- }
- defer p.UnlockTable(branch, dbName, "")
-
- // Iterate all dbs, tables, indexes. Each db will collect
- // []indexMeta above refresh threshold. We read and process those
- // chunks' statistics. We merge updated chunks with precomputed
- // chunks. The full set of statistics for each database lands
- // 1) in the provider's most recent set of database statistics, and
- // 2) on disk in the database's statistics ref'd prolly.Map.
- statDb, ok := p.getStatDb(dbName)
- if !ok {
- return sql.ErrDatabaseNotFound.New(dbName)
- }
-
- var deletedStats []sql.StatQualifier
- qualExists := make(map[sql.StatQualifier]bool)
- tableExistsAndSkipped := make(map[string]bool)
-
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
-
- for _, table := range tables {
- if !p.TryLockForUpdate(branch, dbName, table) {
- ctx.GetLogger().Debugf("statistics refresh: table is already being updated: %s/%s.%s", branch, dbName, table)
- return fmt.Errorf("table already being updated: %s", table)
- }
- defer p.UnlockTable(branch, dbName, table)
-
- sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb)
- if err != nil {
- return err
- }
-
- tableHash, err := dTab.GetRowDataHash(ctx)
- if err != nil {
- return err
- }
-
- if statDb.GetTableHash(branch, table) == tableHash {
- // no data changes since last check
- tableExistsAndSkipped[table] = true
- ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash)
- continue
- } else {
- ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash)
- }
-
- schHash, err := dTab.GetSchemaHash(ctx)
- if err != nil {
- return err
- }
-
- var schemaName string
- if schTab, ok := sqlTable.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, table); oldSchHash.IsEmpty() {
- if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
- return err
- }
- } else if oldSchHash != schHash {
- ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch)
- if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil {
- return err
- }
- stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, table)
- if err != nil {
- return err
- }
- for _, stat := range stats {
- statDb.DeleteStats(ctx, branch, stat.Qualifier())
- }
- } else if err != nil {
- return err
- }
-
- iat, ok := sqlTable.(sql.IndexAddressableTable)
- if !ok {
- return fmt.Errorf("table does not support indexes %s", table)
- }
-
- indexes, err := iat.GetIndexes(ctx)
- if err != nil {
- return err
- }
-
- // collect indexes and ranges to be updated
- var idxMetas []indexMeta
- for _, index := range indexes {
- qual := sql.NewStatQualifier(dbName, schemaName, table, strings.ToLower(index.ID()))
- qualExists[qual] = true
- curStat, ok := statDb.GetStat(branch, qual)
- if !ok {
- curStat = NewDoltStats()
- curStat.Statistic.Qual = qual
-
- cols := make([]string, len(index.Expressions()))
- tablePrefix := fmt.Sprintf("%s.", table)
- for i, c := range index.Expressions() {
- cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
- }
- curStat.Statistic.Cols = cols
- }
- ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String())
-
- updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns())
- if err != nil {
- ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error())
- continue
- }
- curCnt := float64(len(curStat.Active))
- updateCnt := float64(len(updateMeta.newNodes))
- deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks))
- ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt))
-
- if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh {
- if curCnt == 0 && updateCnt == 0 {
- continue
- }
- ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual)
- // mark index for updating
- idxMetas = append(idxMetas, updateMeta)
- // update latest hash if we haven't already
- statDb.SetTableHash(branch, table, tableHash)
- }
- }
-
- // get new buckets for index chunks to update
- newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas)
- if err != nil {
- return err
- }
-
- // merge new chunks with preexisting chunks
- for _, updateMeta := range idxMetas {
- stat := newTableStats[updateMeta.qual]
- if stat != nil {
- var err error
- if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok {
- err = statDb.SetStat(ctx, branch, updateMeta.qual, stat)
- } else {
- err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist)
- }
- if err != nil {
- return err
- }
- p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName))
- }
- }
- }
-
- for _, q := range statDb.ListStatQuals(branch) {
- // table or index delete leaves hole in stats
- // this is separate from threshold check
- if !tableExistsAndSkipped[q.Table()] && !qualExists[q] {
- // only delete stats we've verified are deleted
- deletedStats = append(deletedStats, q)
- }
- }
-
- statDb.DeleteStats(ctx, branch, deletedStats...)
-
- if err := statDb.Flush(ctx, branch); err != nil {
- return err
- }
-
- return nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/update.go b/go/libraries/doltcore/sqle/statspro/bucket_builder.go
similarity index 52%
rename from go/libraries/doltcore/sqle/statspro/update.go
rename to go/libraries/doltcore/sqle/statspro/bucket_builder.go
index 562e82c5679..2c974223f84 100644
--- a/go/libraries/doltcore/sqle/statspro/update.go
+++ b/go/libraries/doltcore/sqle/statspro/bucket_builder.go
@@ -17,19 +17,11 @@ package statspro
import (
"container/heap"
"context"
- "errors"
- "fmt"
- "io"
"sort"
- "strings"
- "time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
- "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
- "github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
@@ -40,153 +32,7 @@ const (
mcvCnt = 3
)
-// createNewStatsBuckets builds histograms for a list of index statistic metadata.
-// We only read chunk ranges indicated by |indexMeta.updateOrdinals|. If
-// the returned buckets are a subset of the index the caller is responsible
-// for reconciling the difference.
-func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Table, indexes []sql.Index, idxMetas []indexMeta) (map[sql.StatQualifier]*DoltStats, error) {
- nameToIdx := make(map[string]sql.Index)
- for _, idx := range indexes {
- nameToIdx[strings.ToLower(idx.ID())] = idx
- }
-
- ret := make(map[sql.StatQualifier]*DoltStats)
-
- for _, meta := range idxMetas {
- var idx durable.Index
- var err error
- if strings.EqualFold(meta.qual.Index(), "PRIMARY") {
- idx, err = dTab.GetRowData(ctx)
- } else {
- idx, err = dTab.GetIndexRowData(ctx, meta.qual.Index())
- }
- if err != nil {
- return nil, err
- }
-
- prollyMap := durable.ProllyMapFromIndex(idx)
- keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc())
-
- sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())]
- fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx)
- if err != nil {
- return nil, err
- }
-
- var types []sql.Type
- for _, cet := range nameToIdx[strings.ToLower(meta.qual.Index())].ColumnExpressionTypes() {
- types = append(types, cet.Type)
- }
-
- if cnt, err := prollyMap.Count(); err != nil {
- return nil, err
- } else if cnt == 0 {
- // table is empty
- ret[meta.qual] = NewDoltStats()
- ret[meta.qual].Statistic.Created = time.Now()
- ret[meta.qual].Statistic.Cols = meta.cols
- ret[meta.qual].Statistic.Typs = types
- ret[meta.qual].Statistic.Qual = meta.qual
-
- ret[meta.qual].Statistic.Fds = fds
- ret[meta.qual].Statistic.Colset = colSet
- ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
-
- continue
- }
-
- firstRow, err := firstRowForIndex(ctx, prollyMap, keyBuilder, len(meta.cols))
- if err != nil {
- return nil, err
- }
-
- updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc())
- ret[meta.qual] = NewDoltStats()
- ret[meta.qual].Chunks = meta.allAddrs
- ret[meta.qual].Statistic.Created = time.Now()
- ret[meta.qual].Statistic.Cols = meta.cols
- ret[meta.qual].Statistic.Typs = types
- ret[meta.qual].Statistic.Qual = meta.qual
- ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols)))
-
- var start, stop uint64
- // read leaf rows for each bucket
- for i, chunk := range meta.newNodes {
- // each node is a bucket
- updater.newBucket()
-
- // we read exclusive range [node first key, next node first key)
- start, stop = meta.updateOrdinals[i].start, meta.updateOrdinals[i].stop
- iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
- if err != nil {
- return nil, err
- }
- for {
- // stats key will be a prefix of the index key
- keyBytes, _, err := iter.Next(ctx)
- if errors.Is(err, io.EOF) {
- break
- } else if err != nil {
- return nil, err
- }
- // build full key
- for i := range keyBuilder.Desc.Types {
- keyBuilder.PutRaw(i, keyBytes.GetField(i))
- }
-
- updater.add(keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
- keyBuilder.Recycle()
- }
-
- // finalize the aggregation
- bucket, err := updater.finalize(ctx, prollyMap.NodeStore())
- if err != nil {
- return nil, err
- }
- bucket.Chunk = chunk.HashOf()
- ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket)
- }
-
- ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct)
- ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount)
- ret[updater.qual].Statistic.LowerBnd = firstRow
- ret[updater.qual].Statistic.Fds = fds
- ret[updater.qual].Statistic.Colset = colSet
- ret[updater.qual].UpdateActive()
- }
- return ret, nil
-}
-
-// MergeNewChunks combines a set of old and new chunks to create
-// the desired target histogram. Undefined behavior if a |targetHash|
-// does not exist in either |oldChunks| or |newChunks|.
-func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) {
- hashToPos := make(map[hash.Hash]int, len(inputHashes))
- for i, h := range inputHashes {
- hashToPos[h] = i
- }
-
- var cnt int
- targetBuckets := make([]sql.HistogramBucket, len(inputHashes))
- for _, c := range oldChunks {
- if idx, ok := hashToPos[DoltBucketChunk(c)]; ok {
- cnt++
- targetBuckets[idx] = c
- }
- }
- for _, c := range newChunks {
- if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil {
- cnt++
- targetBuckets[idx] = c
- }
- }
- if cnt != len(inputHashes) {
- return nil, fmt.Errorf("encountered invalid statistic chunks")
- }
- return targetBuckets, nil
-}
-
-func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder, prefixLen int) (sql.Row, error) {
+func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder) (sql.Row, error) {
if cnt, err := prollyMap.Count(); err != nil {
return nil, err
} else if cnt == 0 {
@@ -208,9 +54,9 @@ func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.Tu
keyBuilder.PutRaw(i, keyBytes.GetField(i))
}
- firstKey := keyBuilder.BuildPrefixNoRecycle(buffPool, prefixLen)
- firstRow := make(sql.Row, prefixLen)
- for i := 0; i < prefixLen; i++ {
+ firstKey := keyBuilder.Build(buffPool)
+ firstRow := make(sql.Row, firstKey.Count())
+ for i := range firstRow {
firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore())
if err != nil {
return nil, err
@@ -266,7 +112,7 @@ func (u *bucketBuilder) newBucket() {
// finalize converts the current aggregation stats into a histogram bucket,
// which includes deserializing most common value tuples into sql.Rows.
-func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBucket, error) {
+func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (*stats.Bucket, error) {
// update MCV in case we've ended on a run of many identical keys
u.updateMcv()
@@ -276,27 +122,25 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu
// convert the MCV tuples into SQL rows (most efficient to only do this once)
mcvRows, err := u.mcvs.Values(ctx, u.tupleDesc, ns, u.prefixLen)
if err != nil {
- return DoltBucket{}, err
+ return nil, err
}
upperBound := make(sql.Row, u.prefixLen)
if u.currentKey != nil {
for i := 0; i < u.prefixLen; i++ {
upperBound[i], err = tree.GetField(ctx, u.tupleDesc, i, u.currentKey, ns)
if err != nil {
- return DoltBucket{}, err
+ return nil, err
}
}
}
- return DoltBucket{
- Bucket: &stats.Bucket{
- RowCnt: uint64(u.count),
- DistinctCnt: uint64(u.distinct),
- BoundCnt: uint64(u.currentCnt),
- McvVals: mcvRows,
- McvsCnt: u.mcvs.Counts(),
- BoundVal: upperBound,
- NullCnt: uint64(u.nulls),
- },
+ return &stats.Bucket{
+ RowCnt: uint64(u.count),
+ DistinctCnt: uint64(u.distinct),
+ BoundCnt: uint64(u.currentCnt),
+ McvVals: mcvRows,
+ McvsCnt: u.mcvs.Counts(),
+ BoundVal: upperBound,
+ NullCnt: uint64(u.nulls),
}, nil
}
diff --git a/go/libraries/doltcore/sqle/statspro/update_test.go b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go
similarity index 92%
rename from go/libraries/doltcore/sqle/statspro/update_test.go
rename to go/libraries/doltcore/sqle/statspro/bucket_builder_test.go
index ef670e19c8b..e97ad343755 100644
--- a/go/libraries/doltcore/sqle/statspro/update_test.go
+++ b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go
@@ -61,27 +61,27 @@ func TestBucketBuilder(t *testing.T) {
name string
keys []sql.Row
keyDesc val.TupleDesc
- bucket DoltBucket
+ bucket *stats.Bucket
}{
{
name: "ints",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 5,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
- }},
+ },
},
{
// technically nulls should be at beginning
name: "ints with middle nulls",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 16,
DistinctCnt: 6,
NullCnt: 3,
@@ -89,13 +89,13 @@ func TestBucketBuilder(t *testing.T) {
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
- }},
+ },
},
{
name: "ints with beginning nulls",
keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 6,
NullCnt: 2,
@@ -103,86 +103,86 @@ func TestBucketBuilder(t *testing.T) {
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(5)},
BoundCnt: 2,
- }},
+ },
},
{
name: "more ints",
keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 22,
DistinctCnt: 7,
BoundCnt: 1,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(7)},
- }},
+ },
},
{
name: "2-ints",
keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 11,
McvVals: []sql.Row{{int64(4), int64(1)}},
McvsCnt: []uint64{3},
BoundVal: sql.Row{int64(5), int64(2)},
BoundCnt: 1,
- }},
+ },
},
{
name: "2-ints with nulls",
keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 5,
DistinctCnt: 5,
NullCnt: 3,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{int64(2), int64(2)},
- BoundCnt: 1},
+ BoundCnt: 1,
},
},
{
name: "varchars",
keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 9,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{"i"},
BoundCnt: 2,
- }},
+ },
},
{
name: "varchar-ints",
keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 15,
DistinctCnt: 12,
McvVals: []sql.Row{},
McvsCnt: []uint64{},
BoundVal: sql.Row{"i", int64(1)},
BoundCnt: 2,
- }},
+ },
},
{
name: "mcvs",
keys: []sql.Row{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {7}, {7}, {7}, {8}, {9}, {10}, {10}, {10}, {11}, {12}, {13}, {14}, {15}, {20}, {21}, {22}},
keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}),
- bucket: DoltBucket{Bucket: &stats.Bucket{
+ bucket: &stats.Bucket{
RowCnt: 23,
DistinctCnt: 18,
McvVals: []sql.Row{{int64(10)}, {int64(7)}},
McvsCnt: []uint64{3, 4},
BoundVal: sql.Row{int64(22)},
BoundCnt: 1,
- }},
+ },
},
}
diff --git a/go/libraries/doltcore/sqle/statspro/configure.go b/go/libraries/doltcore/sqle/statspro/configure.go
deleted file mode 100644
index f8492a08b61..00000000000
--- a/go/libraries/doltcore/sqle/statspro/configure.go
+++ /dev/null
@@ -1,158 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "fmt"
- "strings"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- types2 "github.com/dolthub/go-mysql-server/sql/types"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/utils/filesys"
-)
-
-var helpMsg = "call dolt_stats_purge() to reset statistics"
-
-func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Context) (*sql.Context, error), bThreads *sql.BackgroundThreads, dbs []dsess.SqlDatabase) error {
- p.SetStarter(NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
-
- if _, disabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly); disabled == int8(1) {
- return nil
- }
-
- loadCtx, err := ctxFactory(ctx)
- if err != nil {
- return err
- }
-
- branches := p.getStatsBranches(loadCtx)
-
- var autoEnabled bool
- var startupEnabled bool
- var intervalSec time.Duration
- var thresholdf64 float64
- if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) {
- autoEnabled = true
- _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold)
- _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval)
- interval64, _, _ := types2.Int64.Convert(interval)
- intervalSec = time.Second * time.Duration(interval64.(int64))
- thresholdf64 = threshold.(float64)
-
- p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads))
- p.pro.DropDatabaseHooks = append([]sqle.DropDatabaseHook{NewStatsDropDatabaseHook(p)}, p.pro.DropDatabaseHooks...)
- } else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) {
- startupEnabled = true
- }
-
- eg, ctx := loadCtx.NewErrgroup()
- for _, db := range dbs {
- // copy closure variables
- db := db
- eg.Go(func() (err error) {
- defer func() {
- if r := recover(); r != nil {
- if str, ok := r.(fmt.Stringer); ok {
- err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String())
- } else {
- err = fmt.Errorf("%w: %v", ErrFailedToLoad, r)
- }
- return
- }
- }()
-
- fs, err := p.pro.FileSystemForDatabase(db.Name())
- if err != nil {
- return err
- }
-
- if p.Load(loadCtx, fs, db, branches); err != nil {
- return err
- }
- if autoEnabled {
- return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches)
- } else if startupEnabled {
- if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil {
- return err
- }
- }
- return nil
- })
- }
- return eg.Wait()
-}
-
-// getStatsBranches returns the set of branches whose statistics are tracked.
-// The order of precedence is (1) global variable, (2) session current branch,
-// (3) engine default branch.
-func (p *Provider) getStatsBranches(ctx *sql.Context) []string {
- dSess := dsess.DSessFromSess(ctx.Session)
- var branches []string
- if _, bs, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranches); bs == "" {
- defaultBranch, _ := dSess.GetBranch()
- if defaultBranch != "" {
- branches = append(branches, defaultBranch)
- }
- } else {
- for _, branch := range strings.Split(bs.(string), ",") {
- branches = append(branches, strings.TrimSpace(branch))
- }
- }
-
- if branches == nil {
- branches = append(branches, p.pro.DefaultBranch())
- }
- return branches
-}
-
-func (p *Provider) LoadStats(ctx *sql.Context, db, branch string) error {
- if statDb, ok := p.getStatDb(db); ok {
- return statDb.LoadBranchStats(ctx, branch)
- }
- return nil
-}
-
-// Load scans the statistics tables, populating the |stats| attribute.
-// Statistics are not available for reading until we've finished loading.
-func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDatabase, branches []string) {
- // |statPath| is either file://./stat or mem://stat
- statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir)
- if err != nil {
- ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
- return
- }
-
- for _, branch := range branches {
- if err = statsDb.LoadBranchStats(ctx, branch); err != nil {
- // if branch name is invalid, continue loading rest
- // TODO: differentiate bad branch name from other errors
- ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
- continue
- }
- if err := statsDb.Flush(ctx, branch); err != nil {
- ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg)
- continue
- }
- }
-
- p.setStatDb(strings.ToLower(db.Name()), statsDb)
- return
-}
diff --git a/go/libraries/doltcore/sqle/statspro/doc.go b/go/libraries/doltcore/sqle/statspro/doc.go
new file mode 100644
index 00000000000..51c1cdbbd0b
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/doc.go
@@ -0,0 +1,81 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+// Package statspro provides an event loop that manages table statistics
+// management and access.
+//
+// At any given time there is one thread responsible for pulling work
+// from the job queue to execute. The thread has exclusive ownership
+// over the job channel.
+//
+// All stats are persisted within a single database. If there are multiple
+// databases, one is selected by random as the storage target. If during
+// initialization multiple databases have stats, one will be chosen by
+// random as the target. If a database changes between server restarts,
+// the storage stats will be useless but not impair regular operations because
+// storage is only ever a best-effort content-addressed persistence layer;
+// buckets will be regenerated if they are missing. If the database acting
+// as a storage target is deleted, we swap the cache to write to a new storage
+// target that still exists.
+//
+// The main data structures:
+// - Table statistics map, that returns a list of table index statistics
+// for a specific branch, database, and table name.
+// - Object caches:
+// - Bucket cache: Chunk addressed hash map. All provider histogram
+// references point to objects in the bucket cache. Backed by a
+// best-effort on-disk prolly.Map to make restarts faster.
+// - Template cache: Table-schema/index addressed stats.Statistics object
+// for a specific index.
+// - Bound cache: Chunk addressed first row for an index histogram.
+//
+// Work is broken down into:
+// - A basic update cycle of (1) seed database tables, (2) create or pull
+// buckets from disk, (3) commit statistics accessed by the provider.
+// - GC cycle: Mark and sweep the most recent context's active set into
+// new cache/prolly.Map objects.
+// - Branch sync: Update the tracked set of branch-qualified databases.
+//
+// Regular jobs, GC, and branch-sync are all controlled by tickers at the
+// top level that controls that maximum rate of calling each. GC and
+// branch-sync are prioritized before jobs, and therefore rate-limited to
+// allow the job queue to flush in-between calls.
+//
+// DDL operations and branch create/delete are concurrent to the event
+// loop. We require an extra fixed-sized queue as an intermediary to the
+// job queue to protect the main thread's ownership. DDL acquiring the
+// provider lock is a deadlock risk -- we cannot do any provider checks
+// while holding the db lock. And lastly, the way update jobs are split
+// up over time means we need to do special checks when finalizing a set
+// of database stats. A race between deleting a database and finalizing
+// statistics needs to end with no statistics, which requires a delete check
+// for when finalize wins a race.
+//
+// The stats lifecycle can be controlled with:
+// - dolt_stats_stop: clear queue and disable thread
+// - dolt_stats_restart: clear queue, refresh queue, start thread
+// - dolt_stats_purge: clear queue, refresh queue, clear cache,
+// disable thread
+// - dolt_stats_validate: return report of cache misses for current
+// root value.
+//
+// `dolt_stats_wait` is additionally useful for blocking on a full
+// queue cycle and then validating whether the session head is caught up.
+//
+// `dolt_stats_sync` can be used to grab the most up-to-date branch set
+// for each database. This races with branch ticker and concurrent
+// database/branch adds.
+//
diff --git a/go/libraries/doltcore/sqle/statspro/dolt_stats.go b/go/libraries/doltcore/sqle/statspro/dolt_stats.go
deleted file mode 100644
index 4c5d43250c9..00000000000
--- a/go/libraries/doltcore/sqle/statspro/dolt_stats.go
+++ /dev/null
@@ -1,290 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "fmt"
- "sync"
- "time"
-
- "github.com/dolthub/go-mysql-server/sql"
- "github.com/dolthub/go-mysql-server/sql/stats"
-
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/val"
-)
-
-type DoltStats struct {
- Statistic *stats.Statistic
- mu *sync.Mutex
- // Chunks is a list of addresses for the histogram fanout level
- Chunks []hash.Hash
- // Active maps a chunk/bucket address to its position in
- // the histogram. 1-indexed to differentiate from an empty
- // field on disk
- Active map[hash.Hash]int
- Hist sql.Histogram
- Tb *val.TupleBuilder
-}
-
-func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper {
- return s
-}
-
-var _ sql.Statistic = (*DoltStats)(nil)
-
-func (s *DoltStats) SetChunks(h []hash.Hash) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.Chunks = h
-}
-
-func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithRowCount(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithNullCount(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic {
- ret := *s
- ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic)
- return &ret
-}
-
-func (s *DoltStats) RowCount() uint64 {
- return s.Statistic.RowCount()
-}
-
-func (s *DoltStats) DistinctCount() uint64 {
- return s.Statistic.DistinctCount()
-}
-
-func (s *DoltStats) NullCount() uint64 {
- return s.Statistic.NullCount()
-
-}
-
-func (s *DoltStats) AvgSize() uint64 {
- return s.Statistic.AvgSize()
-
-}
-
-func (s *DoltStats) CreatedAt() time.Time {
- return s.Statistic.CreatedAt()
-
-}
-
-func (s *DoltStats) Columns() []string {
- return s.Statistic.Columns()
-}
-
-func (s *DoltStats) Types() []sql.Type {
- return s.Statistic.Types()
-}
-
-func (s *DoltStats) Qualifier() sql.StatQualifier {
- return s.Statistic.Qualifier()
-}
-
-func (s *DoltStats) IndexClass() sql.IndexClass {
- return s.Statistic.IndexClass()
-}
-
-func (s *DoltStats) FuncDeps() *sql.FuncDepSet {
- return s.Statistic.FuncDeps()
-}
-
-func (s *DoltStats) ColSet() sql.ColSet {
- return s.Statistic.ColSet()
-}
-
-func (s *DoltStats) LowerBound() sql.Row {
- return s.Statistic.LowerBound()
-}
-
-func NewDoltStats() *DoltStats {
- return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
-}
-
-func (s *DoltStats) ToInterface() (interface{}, error) {
- statVal, err := s.Statistic.ToInterface()
- if err != nil {
- return nil, err
- }
- ret := statVal.(map[string]interface{})
-
- var hist sql.Histogram
- for _, b := range s.Hist {
- hist = append(hist, b)
- }
- histVal, err := hist.ToInterface()
- if err != nil {
- return nil, err
- }
- ret["statistic"].(map[string]interface{})["buckets"] = histVal
- return ret, nil
-}
-
-func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
- s.mu.Lock()
- defer s.mu.Unlock()
- ret := *s
- ret.Hist = nil
- for _, b := range h {
- doltB, ok := b.(DoltBucket)
- if !ok {
- return nil, fmt.Errorf("invalid bucket type: %T, %s", b, h.DebugString())
- }
- ret.Hist = append(ret.Hist, doltB)
- }
- return &ret, nil
-}
-
-func (s *DoltStats) Histogram() sql.Histogram {
- s.mu.Lock()
- defer s.mu.Unlock()
- return s.Hist
-}
-
-func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) {
- hist, err := DoltHistFromSql(stat.Histogram(), stat.Types())
- if err != nil {
- return nil, err
- }
- ret := &DoltStats{
- mu: &sync.Mutex{},
- Hist: hist,
- Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()),
- Active: make(map[hash.Hash]int),
- }
- ret.Statistic.Fds = stat.FuncDeps()
- ret.Statistic.Colset = stat.ColSet()
- return ret, nil
-}
-
-func (s *DoltStats) UpdateActive() {
- s.mu.Lock()
- defer s.mu.Unlock()
- newActive := make(map[hash.Hash]int)
- for i, hash := range s.Chunks {
- newActive[hash] = i
- }
- s.Active = newActive
-}
-
-type DoltHistogram []DoltBucket
-
-type DoltBucket struct {
- Bucket *stats.Bucket
- Chunk hash.Hash
- Created time.Time
-}
-
-func (d DoltBucket) RowCount() uint64 {
- return d.Bucket.RowCount()
-}
-
-func (d DoltBucket) DistinctCount() uint64 {
- return d.Bucket.DistinctCount()
-}
-
-func (d DoltBucket) NullCount() uint64 {
- return d.Bucket.NullCount()
-}
-
-func (d DoltBucket) BoundCount() uint64 {
- return d.Bucket.BoundCount()
-}
-
-func (d DoltBucket) UpperBound() sql.Row {
- return d.Bucket.UpperBound()
-}
-
-func (d DoltBucket) McvCounts() []uint64 {
- return d.Bucket.McvCounts()
-}
-
-func (d DoltBucket) Mcvs() []sql.Row {
- return d.Bucket.Mcvs()
-}
-
-func DoltBucketChunk(b sql.HistogramBucket) hash.Hash {
- return b.(DoltBucket).Chunk
-}
-
-func DoltBucketCreated(b sql.HistogramBucket) time.Time {
- return b.(DoltBucket).Created
-}
-
-var _ sql.HistogramBucket = (*DoltBucket)(nil)
-
-func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) {
- ret := make(sql.Histogram, len(hist))
- var err error
- for i, b := range hist {
- upperBound := make(sql.Row, len(b.UpperBound()))
- for i, v := range b.UpperBound() {
- upperBound[i], _, err = types[i].Convert(v)
- if err != nil {
- return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
- }
- }
- mcvs := make([]sql.Row, len(b.Mcvs()))
- for i, mcv := range b.Mcvs() {
- for _, v := range mcv {
- conv, _, err := types[i].Convert(v)
- if err != nil {
- return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String())
- }
- mcvs[i] = append(mcvs[i], conv)
- }
- }
- ret[i] = DoltBucket{
- Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs).(*stats.Bucket),
- }
- }
- return ret, nil
-}
diff --git a/go/libraries/doltcore/sqle/statspro/initdbhook.go b/go/libraries/doltcore/sqle/statspro/initdbhook.go
index 8e11408ea59..1a31a1055bd 100644
--- a/go/libraries/doltcore/sqle/statspro/initdbhook.go
+++ b/go/libraries/doltcore/sqle/statspro/initdbhook.go
@@ -15,10 +15,6 @@
package statspro
import (
- "context"
- "fmt"
- "strings"
-
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
@@ -26,67 +22,29 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
)
-func NewStatsInitDatabaseHook(
- statsProv *Provider,
- ctxFactory func(ctx context.Context) (*sql.Context, error),
- bThreads *sql.BackgroundThreads,
-) sqle.InitDatabaseHook {
+func NewInitDatabaseHook(sc *StatsCoord) sqle.InitDatabaseHook {
return func(
ctx *sql.Context,
- pro *sqle.DoltDatabaseProvider,
+ _ *sqle.DoltDatabaseProvider,
name string,
denv *env.DoltEnv,
db dsess.SqlDatabase,
) error {
- dbName := strings.ToLower(db.Name())
- if statsDb, ok := statsProv.getStatDb(dbName); !ok {
- statsDb, err := statsProv.sf.Init(ctx, db, statsProv.pro, denv.FS, env.GetCurrentUserHomeDir)
- if err != nil {
- ctx.GetLogger().Debugf("statistics load error: %s", err.Error())
- return nil
- }
- statsProv.setStatDb(dbName, statsDb)
- } else {
- dSess := dsess.DSessFromSess(ctx.Session)
- for _, br := range statsDb.Branches() {
- branchQDbName := BranchQualifiedDatabase(dbName, br)
- sqlDb, err := dSess.Provider().Database(ctx, branchQDbName)
- if err != nil {
- ctx.GetLogger().Logger.Errorf("branch not found: %s", br)
- continue
- }
- branchQDb, ok := sqlDb.(dsess.SqlDatabase)
- if !ok {
- return fmt.Errorf("branch/database not found: %s", branchQDbName)
- }
-
- if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil {
- return err
- } else if ok {
- if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil {
- return err
- }
- }
- }
- ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName)
+ sqlDb, ok := db.(sqle.Database)
+ if !ok {
+ return nil
}
- ctx.GetLogger().Debugf("statistics refresh: initialize %s", name)
- return statsProv.InitAutoRefresh(ctxFactory, name, bThreads)
+
+ // call should only fail if backpressure in secondary queue
+ sc.AddFs(sqlDb, denv.FS)
+ return nil
}
}
-func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook {
+func NewDropDatabaseHook(sc *StatsCoord) sqle.DropDatabaseHook {
return func(ctx *sql.Context, name string) {
- statsProv.CancelRefreshThread(name)
- if err := statsProv.DropDbStats(ctx, name, false); err != nil {
+ if err := sc.DropDbStats(ctx, name, false); err != nil {
ctx.GetLogger().Debugf("failed to close stats database: %s", err)
}
-
- if db, ok := statsProv.getStatDb(name); ok {
- if err := db.Close(); err != nil {
- ctx.GetLogger().Debugf("failed to close stats database: %s", err)
- }
- delete(statsProv.statDbs, name)
- }
}
}
diff --git a/go/libraries/doltcore/sqle/statspro/interface.go b/go/libraries/doltcore/sqle/statspro/interface.go
deleted file mode 100644
index 5a423466f91..00000000000
--- a/go/libraries/doltcore/sqle/statspro/interface.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2024 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/libraries/utils/filesys"
- "github.com/dolthub/dolt/go/store/hash"
-)
-
-// Database is a backing store for a collection of DoltStats.
-// Each stats database tracks a user database, with multiple
-// branches potentially each having their own statistics.
-type Database interface {
- // ListStatQuals returns the list of index statistics for a branch.
- ListStatQuals(branch string) []sql.StatQualifier
- // LoadBranchStats starts tracking a specific branch's statistics.
- LoadBranchStats(ctx *sql.Context, branch string) error
- // DeleteBranchStats removes references to in memory index statistics.
- // If |flush| is true delete the data from storage.
- DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error
- // GetStat returns a branch's index statistics.
- GetStat(branch string, qual sql.StatQualifier) (*DoltStats, bool)
- //SetStat bulk replaces the statistic, deleting any previous version
- SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *DoltStats) error
- //DeleteStats deletes a list of index statistics.
- DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier)
- // ReplaceChunks is an update interface that lets a stats implementation
- // decide how to edit stats for a stats refresh.
- ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error
- // Flush instructs the database to sync any partial state to disk
- Flush(ctx context.Context, branch string) error
- // Close finalizes any file references.
- Close() error
- // SetTableHash updates the most recently tracked table stats table hash
- SetTableHash(branch, tableName string, h hash.Hash)
- // GetTableHash returns the most recently tracked table stats table hash
- GetTableHash(branch, tableName string) hash.Hash
- // SetSchemaHash updates the most recently stored table stat's schema hash
- SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error
- // GetSchemaHash returns the schema hash for the latest stored statistics
- GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error)
- // Branches returns the set of branches with tracked statistics databases
- Branches() []string
- // SchemaChange returns false if any table schema in the session
- // root is incompatible with the latest schema used to create a stored
- // set of statistics.
- SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error)
-}
-
-// StatsFactory instances construct statistic databases.
-type StatsFactory interface {
- // Init gets a reference to the stats database for a dolt database
- // rooted at the given filesystem. It will create the database if
- // it does not exist.
- Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (Database, error)
-}
diff --git a/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go
new file mode 100644
index 00000000000..15d28e2115b
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go
@@ -0,0 +1,366 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jobqueue
+
+import (
+ "context"
+ "errors"
+ "sync"
+ "sync/atomic"
+
+ "github.com/dolthub/dolt/go/libraries/utils/circular"
+)
+
+// A SerialQueue is a job queue which runs one job at a time. Jobs are
+// run in the order they are submitted, with the exception that every
+// interrupt job is run before any normal priority job.
+//
+// A SerialQueue can be paused, in which case it will accept new
+// submissions, but will not run them until it is started again.
+//
+// A SerialQueue can be purged, which deletes any pending jobs from
+// it.
+//
+// A SerialQueue can be stopped, in which case it will not accept new
+// submissions and no pending work will be run. Stopping a queue does
+// not purge it, but it is easy for a caller to stop and purge the
+// queue.
+//
+// A stopped or paused SerialQueue can be started, which will cause it
+// to start running submitted jobs again, including any unpurged jobs
+// which were pending when it was stopped or paused.
+//
+// A SerialQueue runs background threads to coordinate its
+// behavior. These background threads are launched with a `Context`
+// supplied to its |Run| method. If that `Context` ever becomes
+// `Done`, the SerialQueue termainally enters a completed state.
+//
+// In general, jobs running on the queue should not block indefinitely
+// and should be very careful about any synchronization. It is safe
+// for jobs within the queue to call DoAsync, InterruptAsync, Stop,
+// Pause, Purge and Start on the queue itself. It is a deadlock for a
+// job within the queue to perform a DoSync or InterruptSync on the
+// queue itself, although that deadlock may be resolved if the
+// provided |ctx| ends up |Done|.
+type SerialQueue struct {
+ running atomic.Bool
+
+ // If the queue is terminally completed, this will be closed.
+ // Submissions to the queue scheduler select on this channel
+ // to return errors if the scheduler is no longer accepting
+ // work.
+ completed chan struct{}
+
+ runnerCh chan work
+ schedCh chan schedReq
+}
+
+var ErrStoppedQueue = errors.New("stopped queue: cannot submit work to a stopped queue.")
+var ErrCompletedQueue = errors.New("completed queue: the queue is no longer running.")
+
+// Create a new serial queue. All of the methods on the returned
+// SerialQueue block indefinitely until its |Run| method is called.
+func NewSerialQueue() *SerialQueue {
+ return &SerialQueue{
+ completed: make(chan struct{}),
+ runnerCh: make(chan work),
+ schedCh: make(chan schedReq),
+ }
+}
+
+// Run the serial queue's background threads with this |ctx|. If the
+// |ctx| ever becomes |Done|, the queue enters a terminal completed
+// state. It is an error to call this function more than once.
+func (s *SerialQueue) Run(ctx context.Context) {
+ if !s.running.CompareAndSwap(false, true) {
+ panic("Cannot run a SerialQueue more than once.")
+ }
+ defer close(s.completed)
+ var wg sync.WaitGroup
+ wg.Add(2)
+ go func() {
+ defer wg.Done()
+ s.runScheduler(ctx)
+ }()
+ go func() {
+ defer wg.Done()
+ s.runRunner(ctx)
+ }()
+ wg.Wait()
+}
+
+// Start the queue. The queue can be in any state, including already started.
+func (s *SerialQueue) Start() error {
+ return s.makeReq(schedReq{
+ reqType: schedReqType_Start,
+ resp: make(chan schedResp, 1),
+ })
+}
+
+// Pause the queue. The queue can be in any state, including already
+// paused. Note that pausing the queue does not block on any
+// currently running job to complete. A pattern to pause the queue
+// with a guarantee that nothing is currently running is:
+//
+// s.InterruptSync(context.Background(), func() { s.Pause() })
+func (s *SerialQueue) Pause() error {
+ return s.makeReq(schedReq{
+ reqType: schedReqType_Pause,
+ resp: make(chan schedResp, 1),
+ })
+}
+
+// Stop the queue. The queue can be in any state, including already
+// stopped. Note that stopping the queue does not block on any
+// currently running job to complete.
+func (s *SerialQueue) Stop() error {
+ return s.makeReq(schedReq{
+ reqType: schedReqType_Stop,
+ resp: make(chan schedResp, 1),
+ })
+}
+
+// Purge the queue. All pending jobs will be dropped.
+func (s *SerialQueue) Purge() error {
+ return s.makeReq(schedReq{
+ reqType: schedReqType_Purge,
+ resp: make(chan schedResp, 1),
+ })
+}
+
+// Run a high priority job on the SerialQueue, blocking for its completion.
+// If done against a Paused queue, this could block indefinitely. The
+// block for completion is gated on the |ctx|.
+func (s *SerialQueue) InterruptSync(ctx context.Context, f func()) error {
+ w, err := s.submitWork(schedPriority_High, f)
+ if err != nil {
+ return err
+ }
+ select {
+ case <-w.done:
+ return nil
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-s.completed:
+ return ErrCompletedQueue
+ }
+}
+
+// Run a normal priority job on the SerialQueue, blocking for its completion.
+// When done against a paused queue, this can block indefinitely.
+func (s *SerialQueue) DoSync(ctx context.Context, f func()) error {
+ w, err := s.submitWork(schedPriority_Normal, f)
+ if err != nil {
+ return err
+ }
+ select {
+ case <-w.done:
+ return nil
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ case <-s.completed:
+ return ErrCompletedQueue
+ }
+}
+
+// Run a high priority job asynchronously on the queue. Returns once the
+// job is accepted.
+func (s *SerialQueue) InterruptAsync(f func()) error {
+ _, err := s.submitWork(schedPriority_High, f)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// Run a normal priority job asynchronously on the queue. Returns once the
+// job is accepted.
+func (s *SerialQueue) DoAsync(f func()) error {
+ _, err := s.submitWork(schedPriority_Normal, f)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// Helper function to submit work. Returns the work submitted, if it
+// was successful, and an error otherwise.
+func (s *SerialQueue) submitWork(pri schedPriority, f func()) (work, error) {
+ w := work{
+ f: f,
+ done: make(chan struct{}),
+ }
+ err := s.makeReq(schedReq{
+ reqType: schedReqType_Enqueue,
+ pri: pri,
+ work: w,
+ resp: make(chan schedResp, 1),
+ })
+ if err != nil {
+ return work{}, err
+ }
+ return w, nil
+}
+
+func (s *SerialQueue) makeReq(req schedReq) error {
+ select {
+ case s.schedCh <- req:
+ resp := <-req.resp
+ return resp.err
+ case <-s.completed:
+ return ErrCompletedQueue
+ }
+}
+
+// Read off the input channels and maintain queues of pending work.
+// Deliver that work to the runner channel if it is desired.
+func (s *SerialQueue) runScheduler(ctx context.Context) {
+ state := schedState_Running
+ normalQ := circular.NewBuff[work](16)
+ highQ := circular.NewBuff[work](16)
+ for {
+ var sendWorkCh chan work
+ var sendWork work
+ var sentWorkCallback func()
+
+ if state == schedState_Running {
+ if highQ.Len() > 0 {
+ sendWorkCh = s.runnerCh
+ sendWork = highQ.Front()
+ sentWorkCallback = highQ.Pop
+ } else if normalQ.Len() > 0 {
+ sendWorkCh = s.runnerCh
+ sendWork = normalQ.Front()
+ sentWorkCallback = normalQ.Pop
+ }
+ }
+
+ select {
+ case msg := <-s.schedCh:
+ switch msg.reqType {
+ case schedReqType_Enqueue:
+ if state == schedState_Stopped {
+ msg.resp <- schedResp{
+ err: ErrStoppedQueue,
+ }
+ } else {
+ if msg.pri == schedPriority_High {
+ highQ.Push(msg.work)
+ } else {
+ normalQ.Push(msg.work)
+ }
+ msg.resp <- schedResp{
+ err: nil,
+ }
+ }
+ case schedReqType_Purge:
+ highQ = circular.NewBuff[work](highQ.Cap())
+ normalQ = circular.NewBuff[work](normalQ.Cap())
+ msg.resp <- schedResp{
+ err: nil,
+ }
+ case schedReqType_Start:
+ state = schedState_Running
+ msg.resp <- schedResp{
+ err: nil,
+ }
+ case schedReqType_Pause:
+ state = schedState_Paused
+ msg.resp <- schedResp{
+ err: nil,
+ }
+ case schedReqType_Stop:
+ state = schedState_Stopped
+ msg.resp <- schedResp{
+ err: nil,
+ }
+ }
+ case sendWorkCh <- sendWork:
+ // Pop from queue the work came from.
+ sentWorkCallback()
+ case <-ctx.Done():
+ return
+ }
+ }
+}
+
+// Read off the runner channel and run the submitted work.
+func (s *SerialQueue) runRunner(ctx context.Context) {
+ for {
+ select {
+ case w := <-s.runnerCh:
+ w.f()
+ close(w.done)
+ case <-ctx.Done():
+ return
+ }
+ }
+}
+
+// |work| represents work to be run on the runner goroutine.
+type work struct {
+ // The function to call.
+ f func()
+ // The channel to close after the work is run.
+ done chan struct{}
+}
+
+type schedState int
+
+const (
+ // When scheduler is running, it is willing to accept new work
+ // and to give work to the work thread.
+ schedState_Running schedState = iota
+ // When scheduler is paused, it is willing to accept new work
+ // but it does not give work to the work thread.
+ schedState_Paused
+ // When scheduler is stopped, it does not accept new work
+ // and it does not give work to the work thread.
+ schedState_Stopped
+)
+
+type schedReqType int
+
+const (
+ schedReqType_Enqueue schedReqType = iota
+ schedReqType_Purge
+ schedReqType_Start
+ schedReqType_Pause
+ schedReqType_Stop
+)
+
+type schedPriority int
+
+const (
+ schedPriority_Normal schedPriority = iota
+ schedPriority_High
+)
+
+// Incoming message for the scheduler thread.
+type schedReq struct {
+ reqType schedReqType
+ // Always set, the scheduler's response is
+ // sent through this channel. The send
+ // must never block.
+ resp chan schedResp
+ // Set when |reqType| is Enqueue
+ pri schedPriority
+ // Set when |reqType| is Enqueue
+ work work
+}
+
+type schedResp struct {
+ err error
+}
diff --git a/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go
new file mode 100644
index 00000000000..dd603cc7903
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go
@@ -0,0 +1,279 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package jobqueue
+
+import (
+ "context"
+ "sync"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestSerialQueue(t *testing.T) {
+ t.Run("CanceledRunContext", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel()
+ queue := NewSerialQueue()
+ // This should return.
+ queue.Run(ctx)
+ // Now all methods should return ErrCompletedQueue.
+ assert.ErrorIs(t, queue.Start(), ErrCompletedQueue)
+ assert.ErrorIs(t, queue.Pause(), ErrCompletedQueue)
+ assert.ErrorIs(t, queue.Stop(), ErrCompletedQueue)
+ assert.ErrorIs(t, queue.DoSync(context.Background(), func() {}), ErrCompletedQueue)
+ assert.ErrorIs(t, queue.DoAsync(func() {}), ErrCompletedQueue)
+ assert.ErrorIs(t, queue.InterruptSync(context.Background(), func() {}), ErrCompletedQueue)
+ assert.ErrorIs(t, queue.InterruptAsync(func() {}), ErrCompletedQueue)
+ })
+ t.Run("StartsRunning", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ var ran bool
+ err := queue.DoSync(context.Background(), func() {
+ ran = true
+ })
+ assert.NoError(t, err)
+ assert.True(t, ran, "the sync task ran.")
+ cancel()
+ wg.Wait()
+ })
+ t.Run("StoppedQueueReturnsError", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ assert.NoError(t, queue.Stop())
+ err := queue.DoSync(context.Background(), func() {})
+ assert.ErrorIs(t, err, ErrStoppedQueue)
+ cancel()
+ wg.Wait()
+ })
+ t.Run("PausedQueueDoesNotRun", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ assert.NoError(t, queue.Pause())
+ var ran bool
+ for i := 0; i < 16; i++ {
+ err := queue.DoAsync(func() {
+ ran = true
+ })
+ assert.NoError(t, err)
+ }
+ cancel()
+ wg.Wait()
+ assert.False(t, ran, "work did not run on the paused queue.")
+ })
+ t.Run("StartingPausedQueueRunsIt", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ assert.NoError(t, queue.Pause())
+ var ran bool
+ for i := 0; i < 16; i++ {
+ err := queue.DoAsync(func() {
+ ran = true
+ })
+ assert.NoError(t, err)
+ }
+ assert.NoError(t, queue.Start())
+ err := queue.DoSync(context.Background(), func() {})
+ assert.NoError(t, err)
+ assert.True(t, ran, "work ran after the paused queue was started.")
+ cancel()
+ wg.Wait()
+ })
+ t.Run("InterruptWorkRunsFirst", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ assert.NoError(t, queue.Pause())
+ var cnt int
+ queue.DoAsync(func() {
+ assert.Equal(t, cnt, 2)
+ cnt += 1
+ })
+ queue.DoAsync(func() {
+ assert.Equal(t, cnt, 3)
+ cnt += 1
+ })
+ queue.InterruptAsync(func() {
+ assert.Equal(t, cnt, 0)
+ cnt += 1
+ })
+ queue.InterruptAsync(func() {
+ assert.Equal(t, cnt, 1)
+ cnt += 1
+ })
+ assert.NoError(t, queue.Start())
+ assert.NoError(t, queue.DoSync(context.Background(), func() {}))
+ assert.Equal(t, cnt, 4)
+ cancel()
+ wg.Wait()
+ })
+ t.Run("StopFromQueue", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ var cnt int
+ for i := 0; i < 16; i++ {
+ // Some of these calls my error, since the queue
+ // will be stopped asynchronously.
+ queue.DoAsync(func() {
+ cnt += 1
+ assert.NoError(t, queue.Stop())
+ })
+ }
+ assert.Equal(t, cnt, 1)
+ cancel()
+ wg.Wait()
+ })
+ t.Run("PauseFromQueue", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ var cnt int
+ for i := 0; i < 16; i++ {
+ err := queue.DoAsync(func() {
+ cnt += 1
+ assert.NoError(t, queue.Pause())
+ })
+ assert.NoError(t, err)
+ }
+ assert.Equal(t, cnt, 1)
+ cancel()
+ wg.Wait()
+ })
+ t.Run("PurgeFromQueue", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ assert.NoError(t, queue.Pause())
+ var cnt int
+ didRun := make(chan struct{})
+ for i := 0; i < 16; i++ {
+ err := queue.DoAsync(func() {
+ cnt += 1
+ assert.NoError(t, queue.Purge())
+ close(didRun)
+ })
+ assert.NoError(t, err)
+ }
+ assert.NoError(t, queue.Start())
+ <-didRun
+ assert.NoError(t, queue.DoSync(context.Background(), func() {}))
+ assert.Equal(t, cnt, 1)
+ cancel()
+ wg.Wait()
+ })
+ t.Run("DoSyncInQueueDeadlockWithContext", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ var cnt int
+ err := queue.DoSync(context.Background(), func() {
+ cnt += 1
+ ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+ defer cancel()
+ err := queue.DoSync(ctx, func() {
+ cnt += 1
+ })
+ assert.ErrorIs(t, err, context.DeadlineExceeded)
+ })
+ assert.NoError(t, err)
+ assert.NoError(t, queue.DoSync(context.Background(), func() {}))
+ // Both tasks eventually ran...
+ assert.Equal(t, cnt, 2)
+ cancel()
+ wg.Wait()
+ })
+ t.Run("SyncReturnsErrCompletedQueueAfterWorkAccepted", func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ queue := NewSerialQueue()
+ var wg sync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ queue.Run(ctx)
+ }()
+ queue.Pause()
+ var err error
+ var ran bool
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ err = queue.InterruptSync(context.Background(), func() {
+ ran = true
+ })
+ }()
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ time.Sleep(100 * time.Millisecond)
+ queue.Stop()
+ }()
+ cancel()
+ wg.Wait()
+ assert.ErrorIs(t, err, ErrCompletedQueue)
+ assert.False(t, ran, "the interrupt task never ran.")
+ })
+}
diff --git a/go/libraries/doltcore/sqle/statspro/noop_provider.go b/go/libraries/doltcore/sqle/statspro/noop_provider.go
new file mode 100644
index 00000000000..204f1238e0e
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/noop_provider.go
@@ -0,0 +1,82 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "github.com/dolthub/go-mysql-server/sql"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+)
+
+type StatsNoop struct{}
+
+func (s StatsNoop) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
+ return nil, nil
+}
+
+func (s StatsNoop) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error {
+ return nil
+}
+
+func (s StatsNoop) SetStats(ctx *sql.Context, stats sql.Statistic) error {
+ return nil
+}
+
+func (s StatsNoop) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
+ return nil, false
+}
+
+func (s StatsNoop) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
+ return nil
+}
+
+func (s StatsNoop) DropDbStats(ctx *sql.Context, db string, flush bool) error {
+ return nil
+}
+
+func (s StatsNoop) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
+ return 0, nil
+}
+
+func (s StatsNoop) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
+ return 0, nil
+}
+
+func (s StatsNoop) CancelRefreshThread(string) {
+ return
+}
+
+func (s StatsNoop) StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error {
+ return nil
+}
+
+func (s StatsNoop) ThreadStatus(string) string {
+ return "stats disabled"
+}
+
+func (s StatsNoop) Prune(ctx *sql.Context) error {
+ return nil
+}
+
+func (s StatsNoop) Purge(ctx *sql.Context) error {
+ return nil
+}
+
+func (s StatsNoop) WaitForDbSync(ctx *sql.Context) error {
+ return nil
+}
+
+var _ sql.StatsProvider = StatsNoop{}
diff --git a/go/libraries/doltcore/sqle/statspro/provider.go b/go/libraries/doltcore/sqle/statspro/provider.go
new file mode 100644
index 00000000000..88ab86b3f45
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/provider.go
@@ -0,0 +1,405 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "fmt"
+ "path"
+ "path/filepath"
+ "strings"
+
+ "github.com/dolthub/dolt/go/cmd/dolt/doltversion"
+ "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
+ "github.com/dolthub/dolt/go/libraries/utils/earl"
+ "github.com/dolthub/dolt/go/store/types"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+)
+
+var _ sql.StatsProvider = (*StatsCoord)(nil)
+
+func (sc *StatsCoord) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ branch, err := dSess.GetBranch()
+ if err != nil {
+ return nil, err
+ }
+ key := tableIndexesKey{
+ db: db,
+ branch: branch,
+ table: table.Name(),
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ st := sc.Stats[key]
+ var ret []sql.Statistic
+ for _, s := range st {
+ ret = append(ret, s)
+ }
+ return ret, nil
+}
+
+func (sc *StatsCoord) RefreshTableStats(ctx *sql.Context, table sql.Table, dbName string) error {
+ dSess := dsess.DSessFromSess(ctx.Session)
+
+ var branch string
+ if strings.Contains(dbName, "/") {
+ parts := strings.Split(dbName, "/")
+ if len(parts) == 2 {
+ dbName = parts[0]
+ branch = parts[1]
+ }
+ }
+ if branch == "" {
+ branch, err := dSess.GetBranch()
+ if err != nil {
+ return err
+ }
+
+ if branch == "" {
+ branch = "main"
+ }
+ }
+
+ db, err := sc.pro.Database(ctx, dbName)
+ sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), branch, branch+"/"+dbName)
+ if err != nil {
+ return err
+ }
+
+ tableKey, newTableStats, err := sc.updateTable(ctx, table.Name(), sqlDb)
+ if err != nil {
+ return err
+ }
+
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ sc.Stats[tableKey] = newTableStats
+ return nil
+}
+
+func (sc *StatsCoord) SetStats(ctx *sql.Context, s sql.Statistic) error {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ ss, ok := s.(*stats.Statistic)
+ if !ok {
+ return fmt.Errorf("expected *stats.Statistics, found %T", s)
+ }
+ key, err := sc.statsKey(ctx, ss.Qualifier().Db(), ss.Qualifier().Table())
+ if err != nil {
+ return err
+ }
+ sc.Stats[key] = sc.Stats[key][:0]
+ sc.Stats[key] = append(sc.Stats[key], ss)
+ return nil
+}
+
+func (sc *StatsCoord) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ key, err := sc.statsKey(ctx, qual.Database, qual.Table())
+ if err != nil {
+ return nil, false
+ }
+ for _, s := range sc.Stats[key] {
+ if strings.EqualFold(s.Qualifier().Index(), qual.Index()) {
+ return s, true
+ }
+ }
+ return nil, false
+}
+
+func (sc *StatsCoord) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ key := tableIndexesKey{
+ db: db,
+ branch: branch,
+ table: table,
+ schema: schema,
+ }
+ return sc.Stats[key], nil
+}
+
+func (sc *StatsCoord) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error {
+ key, err := sc.statsKey(ctx, qual.Database, qual.Table())
+ if err != nil {
+ return err
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ delete(sc.Stats, key)
+ return nil
+}
+
+func (sc *StatsCoord) DropDbStats(ctx *sql.Context, dbName string, flush bool) error {
+ return sc.sq.InterruptSync(ctx, func() {
+ if strings.EqualFold(sc.statsBackingDb, dbName) {
+ delete(sc.dbFs, dbName)
+ if err := sc.rotateStorage(ctx); err != nil {
+ sc.descError("drop rotateStorage", err)
+ }
+ }
+
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ var deleteKeys []tableIndexesKey
+ for k, _ := range sc.Stats {
+ if strings.EqualFold(dbName, k.db) {
+ deleteKeys = append(deleteKeys, k)
+ }
+ }
+ for _, k := range deleteKeys {
+ delete(sc.Stats, k)
+ }
+ })
+}
+
+func (sc *StatsCoord) statsKey(ctx *sql.Context, dbName, table string) (tableIndexesKey, error) {
+ dSess := dsess.DSessFromSess(ctx.Session)
+ branch, err := dSess.GetBranch()
+ if err != nil {
+ return tableIndexesKey{}, err
+ }
+ key := tableIndexesKey{
+ db: dbName,
+ branch: branch,
+ table: table,
+ }
+ return key, nil
+}
+
+func (sc *StatsCoord) RowCount(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
+ key, err := sc.statsKey(ctx, dbName, table.Name())
+ if err != nil {
+ return 0, err
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ for _, s := range sc.Stats[key] {
+ if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
+ return s.RowCnt, nil
+ }
+ }
+ return 0, nil
+}
+
+func (sc *StatsCoord) DataLength(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) {
+ key, err := sc.statsKey(ctx, dbName, table.Name())
+ if err != nil {
+ return 0, err
+ }
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+ for _, s := range sc.Stats[key] {
+ if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") {
+ return s.RowCnt, nil
+ }
+ }
+ return 0, nil
+}
+
+func (sc *StatsCoord) Init(ctx context.Context, dbs []dsess.SqlDatabase, keepStorage bool) error {
+ sqlCtx, err := sc.ctxGen(ctx)
+ if err != nil {
+ return err
+ }
+ for i, db := range dbs {
+ if db, ok := db.(sqle.Database); ok { // exclude read replica dbs
+ fs, err := sc.pro.FileSystemForDatabase(db.AliasedName())
+ if err != nil {
+ return err
+ }
+ sc.AddFs(db, fs)
+ if i == 0 && !keepStorage {
+ if err := sc.rotateStorage(sqlCtx); err != nil {
+ return err
+ }
+ }
+ }
+ }
+ return nil
+}
+
+func (sc *StatsCoord) Purge(ctx *sql.Context) error {
+ if err := sc.rotateStorage(ctx); err != nil {
+ return err
+ }
+ if err := sc.kv.StartGc(ctx, 0); err != nil {
+ return err
+ }
+ return sc.kv.FinishGc(nil)
+}
+
+func (sc *StatsCoord) rotateStorage(ctx *sql.Context) error {
+ if sc.statsBackingDb != "" {
+ if err := sc.rm(sc.statsBackingDb); err != nil {
+ return err
+ }
+ }
+
+ var mem *memStats
+ switch kv := sc.kv.(type) {
+ case *prollyStats:
+ mem = kv.mem
+ case *memStats:
+ mem = kv
+ default:
+ mem = NewMemStats()
+ }
+
+ if len(sc.dbFs) == 0 {
+ sc.kv = mem
+ sc.statsBackingDb = ""
+ return nil
+ }
+
+ var newStorageTarget string
+ for db, _ := range sc.dbFs {
+ newStorageTarget = db
+ break
+ }
+
+ if err := sc.rm(newStorageTarget); err != nil {
+ return err
+ }
+
+ newKv, err := sc.initStorage(ctx, newStorageTarget)
+ if err != nil {
+ return err
+ }
+
+ newKv.mem = mem
+ sc.kv = newKv
+ sc.statsBackingDb = newStorageTarget
+ return nil
+}
+
+func (sc *StatsCoord) rm(db string) error {
+ fs, ok := sc.dbFs[db]
+ if !ok {
+ return fmt.Errorf("failed to remove stats db: %s filesys not found", db)
+ }
+
+ statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
+ if err != nil {
+ return err
+ }
+
+ if ok, _ := statsFs.Exists(""); ok {
+ if err := statsFs.Delete("", true); err != nil {
+ return err
+ }
+ }
+
+ dropDbLoc, err := statsFs.Abs("")
+ if err != nil {
+ return err
+ }
+
+ if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (sc *StatsCoord) initStorage(ctx *sql.Context, storageTarget string) (*prollyStats, error) {
+ fs, ok := sc.dbFs[strings.ToLower(storageTarget)]
+ if !ok {
+ return nil, fmt.Errorf("failed to remove stats db: %s filesys not found", storageTarget)
+ }
+
+ params := make(map[string]interface{})
+ params[dbfactory.GRPCDialProviderParam] = sc.dialPro
+
+ var urlPath string
+ u, err := earl.Parse(sc.pro.DbFactoryUrl())
+ if u.Scheme == dbfactory.MemScheme {
+ urlPath = path.Join(sc.pro.DbFactoryUrl(), dbfactory.DoltDataDir)
+ } else if u.Scheme == dbfactory.FileScheme {
+ urlPath = doltdb.LocalDirDoltDB
+ }
+
+ statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
+ if err != nil {
+ return nil, err
+ }
+
+ var dEnv *env.DoltEnv
+ exists, isDir := statsFs.Exists("")
+ if !exists {
+ err := statsFs.MkDirs("")
+ if err != nil {
+ return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error())
+ }
+
+ dEnv = env.Load(ctx, sc.hdp, statsFs, urlPath, "test")
+ sess := dsess.DSessFromSess(ctx.Session)
+ err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), storageTarget)
+ if err != nil {
+ return nil, err
+ }
+ } else if !isDir {
+ return nil, fmt.Errorf("file exists where the dolt stats directory should be")
+ } else {
+ dEnv = env.LoadWithoutDB(ctx, sc.hdp, statsFs, "", doltversion.Version)
+ }
+
+ if err := dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params); err != nil {
+ return nil, err
+ }
+
+ deaf := dEnv.DbEaFactory(ctx)
+
+ tmpDir, err := dEnv.TempTableFilesDir()
+ if err != nil {
+ return nil, err
+ }
+ opts := editor.Options{
+ Deaf: deaf,
+ Tempdir: tmpDir,
+ }
+ statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts)
+ if err != nil {
+ return nil, err
+ }
+ return NewProllyStats(ctx, statsDb)
+}
+
+func (sc *StatsCoord) WaitForDbSync(ctx *sql.Context) error {
+ // wait for the current partial + one full cycle to complete
+ for _ = range 2 {
+ done := sc.getCycleWaiter()
+ select {
+ case <-done:
+ case <-ctx.Done():
+ return context.Cause(ctx)
+ }
+ }
+ return nil
+}
+
+func (sc *StatsCoord) Gc(ctx *sql.Context) error {
+ sc.sq.InterruptAsync(func() {
+ sc.doGc = true
+ })
+ return sc.WaitForDbSync(ctx)
+}
diff --git a/go/libraries/doltcore/sqle/statspro/scheduler.go b/go/libraries/doltcore/sqle/statspro/scheduler.go
new file mode 100644
index 00000000000..83a0677ebf2
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/scheduler.go
@@ -0,0 +1,220 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro/jobqueue"
+ "log"
+ "sync"
+ "time"
+
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/sirupsen/logrus"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/utils/filesys"
+)
+
+type ctxFactory func(ctx context.Context) (*sql.Context, error)
+
+func NewStatsCoord(ctx context.Context, pro *sqle.DoltDatabaseProvider, ctxGen ctxFactory, logger *logrus.Logger, threads *sql.BackgroundThreads, dEnv *env.DoltEnv) *StatsCoord {
+ done := make(chan struct{})
+ close(done)
+ kv := NewMemStats()
+ sq := jobqueue.NewSerialQueue()
+ go func() {
+ sq.Run(ctx)
+ }()
+ return &StatsCoord{
+ statsMu: &sync.Mutex{},
+ logger: logger,
+ JobInterval: 500 * time.Millisecond,
+ gcInterval: 24 * time.Hour,
+ branchInterval: 24 * time.Hour,
+ sq: sq,
+ Stats: make(map[tableIndexesKey][]*stats.Statistic),
+ dbFs: make(map[string]filesys.Filesys),
+ threads: threads,
+ senderDone: done,
+ cycleMu: &sync.Mutex{},
+ kv: kv,
+ pro: pro,
+ hdp: dEnv.GetUserHomeDir,
+ dialPro: env.NewGRPCDialProviderFromDoltEnv(dEnv),
+ ctxGen: ctxGen,
+ }
+}
+
+func (sc *StatsCoord) SetMemOnly(v bool) {
+ sc.memOnly = v
+}
+
+func (sc *StatsCoord) SetEnableGc(v bool) {
+ sc.enableGc = v
+}
+
+func (sc *StatsCoord) SetTimers(job, gc, branch int64) {
+ sc.JobInterval = time.Duration(job)
+ sc.gcInterval = time.Duration(gc)
+ sc.branchInterval = time.Duration(branch)
+}
+
+type tableIndexesKey struct {
+ db string
+ branch string
+ table string
+ schema string
+}
+
+func (k tableIndexesKey) String() string {
+ return k.db + "/" + k.branch + "/" + k.table
+}
+
+type StatsCoord struct {
+ logger *logrus.Logger
+ threads *sql.BackgroundThreads
+ pro *sqle.DoltDatabaseProvider
+ statsBackingDb string
+ dialPro dbfactory.GRPCDialProvider
+ hdp env.HomeDirProvider
+ dbFs map[string]filesys.Filesys
+
+ // ctxGen lets us fetch the most recent working root
+ ctxGen ctxFactory
+
+ cycleMu *sync.Mutex
+ cycleCtx context.Context
+ cycleCancel context.CancelFunc
+ sq *jobqueue.SerialQueue
+
+ senderDone chan struct{}
+
+ JobInterval time.Duration
+ gcInterval time.Duration
+ branchInterval time.Duration
+ memOnly bool
+ enableGc bool
+ doGc bool
+ Debug bool
+
+ // kv is a content-addressed cache of histogram objects:
+ // buckets, first bounds, and schema-specific statistic
+ // templates.
+ kv StatsKv
+
+ // Stats tracks table statistics accessible to sessions.
+ Stats map[tableIndexesKey][]*stats.Statistic
+ statsMu *sync.Mutex
+
+ dbCnt int
+ gcCnt int
+}
+
+// Stop stops the sender thread and then pauses the queue
+func (sc *StatsCoord) Stop(ctx context.Context) error {
+ return sc.sq.InterruptSync(ctx, func() {
+ sc.cancelSender()
+ select {
+ case <-ctx.Done():
+ return
+ case <-sc.senderDone:
+ return
+ }
+ })
+ if err := sc.sq.Pause(); err != nil {
+ return err
+ }
+}
+
+// Restart continues the queue and blocks until sender is running
+func (sc *StatsCoord) Restart(ctx context.Context) error {
+ sc.sq.Start()
+ return sc.sq.InterruptSync(ctx, func() {
+ sc.cancelSender()
+ select {
+ case <-ctx.Done():
+ return
+ case <-sc.senderDone:
+ }
+ go func() {
+ sc.runSender(ctx)
+ }()
+ })
+}
+
+func (sc *StatsCoord) Close() {
+ sc.sq.Stop()
+ sc.cancelSender()
+ return
+}
+
+func (sc *StatsCoord) AddFs(db dsess.SqlDatabase, fs filesys.Filesys) {
+ sc.dbFs[db.AliasedName()] = fs
+ return
+}
+
+func (sc *StatsCoord) Info(ctx context.Context) (dprocedures.StatsInfo, error) {
+ sc.statsMu.Lock()
+ defer sc.statsMu.Unlock()
+
+ cachedBucketCnt := sc.kv.Len()
+ var cachedBoundCnt int
+ var cachedTemplateCnt int
+ switch kv := sc.kv.(type) {
+ case *memStats:
+ cachedBoundCnt = len(kv.bounds)
+ cachedTemplateCnt = len(kv.templates)
+ case *prollyStats:
+ cachedBoundCnt = len(kv.mem.bounds)
+ cachedTemplateCnt = len(kv.mem.templates)
+ }
+
+ statCnt := len(sc.Stats)
+
+ storageCnt, err := sc.kv.Flush(ctx)
+ if err != nil {
+ return dprocedures.StatsInfo{}, err
+ }
+ var active bool
+ select {
+ case <-sc.senderDone:
+ default:
+ active = true
+ }
+
+ return dprocedures.StatsInfo{
+ DbCnt: sc.dbCnt,
+ Active: active,
+ CachedBucketCnt: cachedBucketCnt,
+ StorageBucketCnt: storageCnt,
+ CachedBoundCnt: cachedBoundCnt,
+ CachedTemplateCnt: cachedTemplateCnt,
+ StatCnt: statCnt,
+ GcCounter: sc.gcCnt,
+ }, nil
+}
+
+func (sc *StatsCoord) descError(d string, err error) {
+ if sc.Debug {
+ log.Println("stats error: ", err.Error())
+ }
+ sc.logger.Errorf("stats error; job detail: %s; verbose: %s", d, err)
+}
diff --git a/go/libraries/doltcore/sqle/statspro/scheduler_test.go b/go/libraries/doltcore/sqle/statspro/scheduler_test.go
new file mode 100644
index 00000000000..f9d0848202e
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/scheduler_test.go
@@ -0,0 +1,1124 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+ "time"
+
+ gms "github.com/dolthub/go-mysql-server"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/analyzer"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/sirupsen/logrus"
+ "github.com/stretchr/testify/require"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/branch_control"
+ "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
+ "github.com/dolthub/dolt/go/libraries/doltcore/env"
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer"
+)
+
+func TestScheduleLoop(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+
+ {
+ // add more data
+ b := strings.Repeat("b", 100)
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))"))
+ abIns := strings.Builder{}
+ abIns.WriteString("insert into ab values")
+ for i := range 200 {
+ if i > 0 {
+ abIns.WriteString(", ")
+ }
+ abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, abIns.String()))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ // 4 old + 2*7 new ab
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 18, len(kv.buckets))
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 7, len(stat[0].Hist))
+ require.Equal(t, 7, len(stat[1].Hist))
+ }
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy"))
+
+ //doGcCycle(t, ctx, sc)
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 14, len(kv.buckets))
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 2, len(stat))
+ require.Equal(t, 7, len(stat[0].Hist))
+ require.Equal(t, 7, len(stat[1].Hist))
+}
+
+func TestAnalyze(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (-1,-1)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "analyze table xy"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, uint64(0), sc.gcCnt)
+ require.Equal(t, 6, len(kv.buckets))
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ for _, tableStats := range sc.Stats {
+ require.Equal(t, 2, len(tableStats))
+ }
+}
+
+func TestModifyColumn(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy modify column y bigint"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 10, len(kv.buckets))
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 4, len(stat[0].Hist))
+ require.Equal(t, 2, len(stat[1].Hist))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+ require.Equal(t, 6, len(kv.buckets))
+ }
+}
+
+func TestAddColumn(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy add column z int"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4, len(kv.buckets))
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates)) // +2 for new schema
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 2, len(stat[0].Hist))
+ require.Equal(t, 2, len(stat[1].Hist))
+ }
+}
+
+func TestDropIndex(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4, len(kv.buckets))
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 2, len(stat[0].Hist))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 2, len(kv.buckets))
+ require.Equal(t, 1, len(kv.bounds))
+ require.Equal(t, 1, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat = sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 2, len(stat[0].Hist))
+ }
+}
+
+func TestDropTable(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into ab values (0,0)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, len(kv.buckets))
+ require.Equal(t, 3, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 1, len(stat[0].Hist))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 1, len(kv.buckets))
+ require.Equal(t, 1, len(kv.bounds))
+ require.Equal(t, 1, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat = sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 1, len(stat))
+ require.Equal(t, 1, len(stat[0].Hist))
+ }
+}
+
+func TestDeleteAboveBoundary(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 498"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, len(kv.buckets)) // 1 for new chunk
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates)) // +1 for schema change
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}]
+ require.Equal(t, 2, len(stat[0].Hist))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ require.Equal(t, 2, len(kv.buckets))
+ }
+}
+
+func TestDeleteBelowBoundary(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 410"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+
+ require.Equal(t, 5, len(kv.buckets)) // +1 rewrite partial chunk
+ require.Equal(t, 3, len(kv.bounds)) // +1 rewrite first chunk
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}]
+ require.Equal(t, 1, len(stat[0].Hist))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ require.Equal(t, 1, len(kv.buckets))
+ }
+}
+
+func TestDeleteOnBoundary(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y"))
+
+ {
+ // PRIMARY boundary chunk -> rewrite y_idx's second
+ require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 414"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4, len(kv.buckets))
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates)) // +1 schema change
+ require.Equal(t, 1, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}]
+ require.Equal(t, 1, len(stat[0].Hist))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ require.Equal(t, 1, len(kv.buckets))
+ }
+}
+
+func TestAddDropDatabases(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ // xy and t
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, len(kv.buckets))
+ require.Equal(t, 3, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}]
+ require.Equal(t, 1, len(stat))
+ }
+
+ dropHook := NewDropDatabaseHook(sc)
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb"))
+ dropHook(ctx, "otherdb")
+
+ _, ok := sc.Stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}]
+ require.False(t, ok)
+ }
+}
+
+func TestGC(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table s (i int primary key, j int, key (j))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into s values (0,0), (1,1), (2,2)"))
+
+ dropHook := NewDropDatabaseHook(sc)
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb"))
+ dropHook(ctx, "otherdb")
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table s drop index j"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ // test for cleanup
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 5, len(kv.buckets))
+ require.Equal(t, 3, len(kv.bounds))
+ require.Equal(t, 3, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ }
+}
+
+func TestBranches(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = true
+
+ {
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add xy')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add t')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table s (i int primary key, j int, key (j))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into s values (0,0), (1,1), (2,2)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add s')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat1')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat2')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (2), (3)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'insert into t')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat3')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop table t"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'drop t')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat1')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "alter table s drop index j"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'drop index j')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ stat, ok := sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat3", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"thirddb", "feat1", "s", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "main", "t", ""}]
+ require.Equal(t, 1, len(stat))
+ stat = sc.Stats[tableIndexesKey{"thirddb", "main", "s", ""}]
+ require.Equal(t, 2, len(stat))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ stat, ok = sc.Stats[tableIndexesKey{"mydb", "feat1", "xy", ""}]
+ require.True(t, ok)
+ require.Equal(t, 2, len(stat))
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}]
+ require.True(t, ok)
+ require.Equal(t, 1, len(stat))
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat3", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"thirddb", "feat1", "s", ""}]
+ require.True(t, ok)
+ require.Equal(t, 1, len(stat))
+
+ // mydb: 4 shared
+ // otherdb: 1 + 1
+ // thirddb: 2 + shared
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 4+2+2, len(kv.buckets))
+ require.Equal(t, 2+(1+1)+2, len(kv.bounds))
+ require.Equal(t, 2+1+(2+1), len(kv.templates))
+ require.Equal(t, 7-1, len(sc.Stats))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"otherdb", "main", "t", ""}]
+ require.False(t, ok)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_branch('-D', 'feat1')"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ stat, ok = sc.Stats[tableIndexesKey{"mydb", "feat1", "xy", ""}]
+ require.False(t, ok)
+ stat, ok = sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}]
+ require.True(t, ok)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ // 3 dbs remaining, mydb/main, thirddb/feat1, thirddb/main
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 4+2, len(kv.buckets))
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 5, len(kv.templates))
+ require.Equal(t, 3, len(sc.Stats))
+ }
+}
+
+func TestBucketDoubling(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+
+ cur := sc.kv.(*memStats).buckets
+ newB := make(map[bucketKey]*stats.Bucket)
+ for k, v := range cur {
+ newB[k] = v
+ }
+ sc.kv.(*memStats).buckets = newB
+
+ // add more data
+ b := strings.Repeat("b", 100)
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))"))
+ abIns := strings.Builder{}
+ abIns.WriteString("insert into ab values")
+ for i := range 200 {
+ if i > 0 {
+ abIns.WriteString(", ")
+ }
+ abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, abIns.String()))
+
+ sc.enableGc = true
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ // 4 old + 2*7 new ab
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 18, len(kv.buckets))
+ require.Equal(t, 4, len(kv.bounds))
+ require.Equal(t, 4, len(kv.templates))
+ require.Equal(t, 2, len(sc.Stats))
+ stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}]
+ require.Equal(t, 7, len(stat[0].Hist))
+ require.Equal(t, 7, len(stat[1].Hist))
+}
+
+func TestBucketCounting(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+ sc.enableGc = false
+
+ // add more data
+ b := strings.Repeat("b", 100)
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))"))
+ abIns := strings.Builder{}
+ abIns.WriteString("insert into ab values")
+ for i := range 200 {
+ if i > 0 {
+ abIns.WriteString(", ")
+ }
+ abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, abIns.String()))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ // 4 old + 2*7 new ab
+ kv := sc.kv.(*memStats)
+ require.Equal(t, 18, len(kv.buckets))
+ require.Equal(t, 2, len(sc.Stats))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table cd (c int primary key, d varchar(200), key (d,c))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into cd select a,b from ab"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ // no new buckets
+ kv = sc.kv.(*memStats)
+ require.Equal(t, 18, len(kv.buckets))
+ require.Equal(t, 3, len(sc.Stats))
+}
+
+func TestDropOnlyDb(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, false)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ _, ok := sc.kv.(*prollyStats)
+ require.True(t, ok)
+ require.Equal(t, "mydb", sc.statsBackingDb)
+
+ // what happens when we drop the only database? swap to memory?
+ // add first database, switch to prolly?
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database mydb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ require.NoError(t, sc.Stop(context.Background()))
+
+ // empty memory KV
+ _, ok = sc.kv.(*memStats)
+ require.True(t, ok)
+ require.Equal(t, "", sc.statsBackingDb)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb"))
+
+ // empty prollyKv
+ _, ok = sc.kv.(*prollyStats)
+ require.True(t, ok)
+ require.Equal(t, "otherdb", sc.statsBackingDb)
+}
+
+func TestRotateBackingDb(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, false)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database backupdb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "use backupdb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0), (1,1), (2,2)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ require.Equal(t, 5, sc.kv.Len())
+ require.Equal(t, 2, len(sc.Stats))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database mydb"))
+
+ _, ok := sc.kv.(*prollyStats)
+ require.True(t, ok)
+ require.Equal(t, "backupdb", sc.statsBackingDb)
+
+ // lost the backing storage, previous in-memory moves into new kv
+ require.Equal(t, 5, sc.kv.Len())
+ require.Equal(t, 1, len(sc.Stats))
+
+}
+
+func TestReadCounter(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := defaultSetup(t, threads, true)
+
+ {
+ si, err := sc.Info(ctx)
+ require.NoError(t, err)
+ require.Equal(t, 0, si.ReadCnt)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (501, 0)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ si, err = sc.Info(ctx)
+ require.NoError(t, err)
+ require.Equal(t, 2, si.ReadCnt)
+ }
+}
+
+func TestPanic(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ sc.sq.DoSync(ctx, func() {
+ panic("test panic")
+ })
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+}
+
+func TestPurge(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y varchar(10), key (y,x))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0), (1,1), (2,2)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database other"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use other"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(10), key (b,a))"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into ab values (0,0), (1,1), (2,2)"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ require.NoError(t, sc.Stop(context.Background()))
+
+ kv := sc.kv.(*prollyStats)
+ require.Equal(t, 2, kv.Len())
+ require.Equal(t, 4, len(kv.mem.templates))
+ require.Equal(t, 2, len(kv.mem.bounds))
+ m, err := kv.m.Map(ctx)
+ require.NoError(t, err)
+ cmpCnt, err := m.Count()
+ require.NoError(t, err)
+ require.Equal(t, 2, cmpCnt)
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+
+ kv = sc.kv.(*prollyStats)
+ require.Equal(t, 0, kv.Len())
+ require.Equal(t, 0, len(kv.mem.templates))
+ require.Equal(t, 0, len(kv.mem.bounds))
+ m, err = kv.m.Map(ctx)
+ require.NoError(t, err)
+ cmpCnt, err = m.Count()
+ require.NoError(t, err)
+ require.Equal(t, 0, cmpCnt)
+}
+
+func emptySetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool) (*sql.Context, *gms.Engine, *StatsCoord) {
+ dEnv := dtestutils.CreateTestEnv()
+ sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
+ ctx.Session.SetClient(sql.Client{
+ User: "billy boy",
+ Address: "bigbillie@fake.horse",
+ })
+
+ sql.SystemVariables.AssignValues(map[string]interface{}{
+ dsess.DoltStatsGCInterval: 100,
+ dsess.DoltStatsBranchInterval: 100,
+ dsess.DoltStatsJobInterval: 1,
+ })
+
+ sc := sqlEng.Analyzer.Catalog.StatsProvider.(*StatsCoord)
+ sc.SetEnableGc(false)
+ sc.JobInterval = time.Nanosecond
+
+ require.NoError(t, sc.Restart(ctx))
+
+ ctx, _ = sc.ctxGen(ctx)
+ ctx.Session.SetClient(sql.Client{
+ User: "billy boy",
+ Address: "bigbillie@fake.horse",
+ })
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ require.NoError(t, sc.Stop(context.Background()))
+
+ var sqlDbs []sqle.Database
+ for _, db := range sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) {
+ if sqlDb, ok := db.(sqle.Database); ok {
+ branch := ref.NewBranchRef("main")
+ db, err := sqle.RevisionDbForBranch(ctx, sqlDb, branch.GetPath(), branch.GetPath()+"/"+sqlDb.AliasedName())
+ require.NoError(t, err)
+ sqlDbs = append(sqlDbs, db.(sqle.Database))
+ }
+ }
+
+ if memOnly {
+ statsKv := NewMemStats()
+ sc.kv = statsKv
+ }
+
+ return ctx, sqlEng, sc
+}
+
+func defaultSetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool) (*sql.Context, *gms.Engine, *StatsCoord) {
+ ctx, sqlEng, sc := emptySetup(t, threads, memOnly)
+ //sc.Debug = true
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int, key (y,x))"))
+
+ xyIns := strings.Builder{}
+ xyIns.WriteString("insert into xy values")
+ for i := range 500 {
+ if i > 0 {
+ xyIns.WriteString(", ")
+ }
+ xyIns.WriteString(fmt.Sprintf("(%d, %d)", i, i%25))
+ }
+ require.NoError(t, executeQuery(ctx, sqlEng, xyIns.String()))
+
+ var kv *memStats
+ switch s := sc.kv.(type) {
+ case *memStats:
+ kv = s
+ case *prollyStats:
+ kv = s.mem
+ }
+ require.Equal(t, 4, len(kv.buckets))
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ for _, tableStats := range sc.Stats {
+ require.Equal(t, 2, len(tableStats))
+ }
+
+ switch s := sc.kv.(type) {
+ case *memStats:
+ kv = s
+ case *prollyStats:
+ kv = s.mem
+ }
+ require.Equal(t, 4, len(kv.buckets))
+ require.Equal(t, 2, len(kv.bounds))
+ require.Equal(t, 2, len(kv.templates))
+ require.Equal(t, 1, len(sc.Stats))
+ for _, tableStats := range sc.Stats {
+ require.Equal(t, 2, len(tableStats))
+ }
+
+ return ctx, sqlEng, sc
+}
+
+func executeQuery(ctx *sql.Context, eng *gms.Engine, query string) error {
+ _, iter, _, err := eng.Query(ctx, query)
+ if err != nil {
+ return err
+ }
+ for {
+ _, err = iter.Next(ctx)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return err
+ }
+ }
+ return iter.Close(ctx) // tx commit
+}
+
+func executeQueryResults(ctx *sql.Context, eng *gms.Engine, query string) ([]sql.Row, error) {
+ _, iter, _, err := eng.Query(ctx, query)
+ if err != nil {
+ return nil, err
+ }
+ var ret []sql.Row
+ for {
+ r, err := iter.Next(ctx)
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ ret = append(ret, r)
+ }
+ return ret, iter.Close(ctx) // tx commit
+}
+
+func newTestEngine(ctx context.Context, dEnv *env.DoltEnv, threads *sql.BackgroundThreads) (*gms.Engine, *sql.Context) {
+ pro, err := sqle.NewDoltDatabaseProviderWithDatabases("main", dEnv.FS, nil, nil, threads)
+ if err != nil {
+ panic(err)
+ }
+
+ mrEnv, err := env.MultiEnvForDirectory(ctx, dEnv.Config.WriteableConfig(), dEnv.FS, dEnv.Version, dEnv)
+ if err != nil {
+ panic(err)
+ }
+
+ sc := NewStatsCoord(ctx, pro, nil, logrus.StandardLogger(), threads, dEnv)
+
+ gcSafepointController := dsess.NewGCSafepointController()
+
+ doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController)
+ if err != nil {
+ panic(err)
+ }
+
+ sqlCtx := sql.NewContext(ctx, sql.WithSession(doltSession))
+ sqlCtx.SetCurrentDatabase(mrEnv.GetFirstDatabase())
+
+ sc.ctxGen = func(ctx context.Context) (*sql.Context, error) {
+ doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController)
+ if err != nil {
+ return nil, err
+ }
+ return sql.NewContext(ctx, sql.WithSession(doltSession)), nil
+ }
+
+ pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, NewInitDatabaseHook(sc))
+ pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, NewDropDatabaseHook(sc))
+
+ sqlEng := gms.New(analyzer.NewBuilder(pro).Build(), &gms.Config{
+ IsReadOnly: false,
+ IsServerLocked: false,
+ })
+ sqlEng.Analyzer.Catalog.StatsProvider = sc
+ return sqlEng, sqlCtx
+}
+
+func TestStatsGcConcurrency(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+ sc.JobInterval = 1 * time.Nanosecond
+ sc.gcInterval = 100 * time.Nanosecond
+ sc.branchInterval = 50 * time.Nanosecond
+ require.NoError(t, sc.Restart(ctx))
+
+ addDb := func(ctx *sql.Context, dbName string) {
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database "+dbName))
+ }
+
+ addData := func(ctx *sql.Context, dbName string, i int) {
+ //log.Println("add ", dbName)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use "+dbName))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")"))
+ }
+
+ dropDb := func(dropCtx *sql.Context, dbName string) {
+ //log.Println("drop ", dbName)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "drop database "+dbName))
+ }
+
+ // it is important to use new sessions for this test, to avoid working root conflicts
+ addCtx, _ := sc.ctxGen(context.Background())
+ writeCtx, _ := sc.ctxGen(context.Background())
+ dropCtx, _ := sc.ctxGen(context.Background())
+
+ iters := 200
+ dbs := make(chan string, iters)
+
+ {
+ wg := sync.WaitGroup{}
+ wg.Add(2)
+
+ addCnt := 0
+ go func() {
+ for i := range iters {
+ addCnt++
+ dbName := "db" + strconv.Itoa(i)
+ addDb(addCtx, dbName)
+ addData(writeCtx, dbName, i)
+ dbs <- dbName
+ }
+ close(dbs)
+ wg.Done()
+ }()
+
+ dropCnt := 0
+ go func() {
+ i := 0
+ for db := range dbs {
+ if i%2 == 0 {
+ time.Sleep(50 * time.Millisecond)
+ dropCnt++
+ dropDb(dropCtx, db)
+ }
+ i++
+ }
+ wg.Done()
+ }()
+
+ wg.Wait()
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ require.NoError(t, sc.Stop(context.Background()))
+
+ // 101 dbs, 100 with stats (not main)
+ require.Equal(t, iters/2, len(sc.Stats))
+ //require.NoError(t, sc.ValidateState(ctx))
+ require.Equal(t, iters/2, sc.kv.Len())
+ }
+}
+
+func TestStatsBranchConcurrency(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ sc.JobInterval = 10
+ sc.gcInterval = time.Hour
+ sc.branchInterval = time.Hour
+ require.NoError(t, sc.Restart(ctx))
+
+ addBranch := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')"))
+ }
+
+ addData := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")"))
+ //require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ err := executeQuery(ctx, sqlEng, "call dolt_stats_sync()")
+ for err != nil {
+ log.Println("add waiting on: ", err.Error())
+ err = executeQuery(ctx, sqlEng, "call dolt_stats_sync()")
+ }
+ }
+
+ dropBranch := func(dropCtx *sql.Context, branchName string) {
+ //log.Println("delete branch: ", branchName)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ del := "call dolt_branch('-d', '" + branchName + "')"
+ require.NoError(t, executeQuery(ctx, sqlEng, del))
+ }
+
+ // it is important to use new sessions for this test, to avoid working root conflicts
+ addCtx, _ := sc.ctxGen(context.Background())
+ dropCtx, _ := sc.ctxGen(context.Background())
+
+ iters := 100
+ {
+ branches := make(chan string, iters)
+
+ wg := sync.WaitGroup{}
+ wg.Add(2)
+
+ go func() {
+ for i := range iters {
+ addBranch(addCtx, i)
+ addData(addCtx, i)
+ branches <- "branch" + strconv.Itoa(i)
+ }
+ close(branches)
+ wg.Done()
+ }()
+
+ go func() {
+ i := 0
+ for br := range branches {
+ if i%2 == 0 {
+ dropBranch(dropCtx, br)
+ time.Sleep(50 * time.Millisecond)
+ }
+ i++
+ }
+ wg.Done()
+ }()
+
+ wg.Wait()
+
+ err := executeQuery(ctx, sqlEng, "call dolt_stats_sync()")
+ for err != nil {
+ log.Println("waiting on final branch sync", err)
+ err = executeQuery(ctx, sqlEng, "call dolt_stats_sync()")
+ }
+ err = executeQuery(ctx, sqlEng, "call dolt_stats_gc()")
+ for err != nil {
+ log.Println("waiting on final Gc", err)
+ err = executeQuery(ctx, sqlEng, "call dolt_stats_gc()")
+ }
+ require.NoError(t, sc.Stop(context.Background()))
+
+ // at the end we should still have |iters/2| databases
+ require.Equal(t, iters/2, len(sc.Stats))
+ //require.NoError(t, sc.ValidateState(ctx))
+ require.Equal(t, iters/2, sc.kv.Len())
+ }
+}
+
+func TestStatsCacheGrowth(t *testing.T) {
+ //t.Skip("expensive test")
+
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+ ctx, sqlEng, sc := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ sc.JobInterval = 10
+ sc.gcInterval = time.Hour
+ sc.branchInterval = time.Hour
+ require.NoError(t, sc.Restart(ctx))
+
+ addBranch := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')"))
+ }
+
+ addData := func(ctx *sql.Context, i int) {
+ branchName := "branch" + strconv.Itoa(i)
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")"))
+
+ }
+
+ iters := 2000
+ if os.Getenv("CI") != "" {
+ iters = 1025
+ }
+ {
+ branches := make(chan string, iters)
+
+ go func() {
+ addCtx, _ := sc.ctxGen(context.Background())
+ for i := range iters {
+ addBranch(addCtx, i)
+ addData(addCtx, i)
+ branches <- "branch" + strconv.Itoa(i)
+ if i%500 == 0 {
+ log.Println("branches: ", strconv.Itoa(i))
+ for {
+ syncErr := executeQuery(addCtx, sqlEng, "call dolt_stats_sync()")
+ waitErr := executeQuery(addCtx, sqlEng, "call dolt_stats_wait()")
+ if waitErr == nil && syncErr == nil {
+ break
+ } else if syncErr != nil {
+ log.Println("waiting on: ", strconv.Itoa(i), syncErr.Error())
+ } else if syncErr != nil {
+ log.Println("waiting on: ", strconv.Itoa(i), waitErr.Error())
+ }
+ }
+ }
+ }
+ close(branches)
+ }()
+
+ //waitCtx, _ := sc.ctxGen(context.Background())
+ i := 0
+ for _ = range branches {
+ //if i%50 == 0 {
+ // log.Println("branches: ", strconv.Itoa(i))
+ // require.NoError(t, executeQuery(waitCtx, sqlEng, "call dolt_stats_wait()"))
+ //}
+ i++
+ }
+
+ executeQuery(ctx, sqlEng, "call dolt_stats_wait()")
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ require.NoError(t, sc.Stop(context.Background()))
+
+ // at the end we should still have |iters/2| databases
+ require.Equal(t, iters, len(sc.Stats))
+ //require.NoError(t, sc.ValidateState(ctx))
+ require.Equal(t, iters, sc.kv.Len())
+ }
+}
diff --git a/go/libraries/doltcore/sqle/statspro/script_test.go b/go/libraries/doltcore/sqle/statspro/script_test.go
new file mode 100644
index 00000000000..fc0f9529cd5
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/script_test.go
@@ -0,0 +1,738 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "log"
+ "strconv"
+ "testing"
+
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/stretchr/testify/require"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures"
+)
+
+type scriptTest struct {
+ name string
+ setup []string
+ assertions []assertion
+}
+
+type assertion struct {
+ query string
+ res []sql.Row
+ err string
+}
+
+func TestStatScripts(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ defer threads.Shutdown()
+
+ scripts := []scriptTest{
+ {
+ name: "track updates",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'zero'), (1, 'one')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(9)}},
+ },
+ {
+ query: "update xy set y = 2 where x between 100 and 800",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(9)}},
+ },
+ },
+ },
+ {
+ name: "track deletes",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'zero'), (1, 'one')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(9)}},
+ },
+ {
+ query: "delete from xy where x > 600",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(5)}},
+ },
+ },
+ },
+ {
+ name: "ddl table",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'0'), (1,'0'), (2,'0')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "truncate table xy",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(0)}},
+ },
+ {
+ query: "insert into xy values (0,'0'), (1,'0'), (2,'0')",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "drop table xy",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(0)}},
+ },
+ },
+ },
+ {
+ name: "ddl index",
+ setup: []string{
+ "create table xy (x int primary key, y varchar(16), key (y,x))",
+ "insert into xy values (0,'0'), (1,'0'), (2,'0')",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "alter table xy drop index y",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(1)}},
+ },
+ {
+ query: "alter table xy add index yx (y,x)",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
+ res: []sql.Row{{"varchar(16),int", "0,2"}},
+ },
+ {
+ query: "alter table xy modify column y int",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select types, upper_bound from dolt_statistics where index_name = 'yx'",
+ res: []sql.Row{{"int,int", "0,2"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ },
+ },
+ {
+ name: "mcv counts",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "alter table xy add index y2 (y)",
+ "alter table xy add index x2 (x,y)",
+ "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)",
+ },
+ assertions: []assertion{
+ {
+ query: "select mcv1, mcv2, mcv_counts from dolt_statistics where index_name = 'y2'",
+ res: []sql.Row{{"1", "0", "4,6"}},
+ },
+ {
+ query: "select mcv_counts from dolt_statistics where index_name = 'y'",
+ res: []sql.Row{{""}},
+ },
+ {
+ query: "select mcv_counts from dolt_statistics where index_name = 'x2'",
+ res: []sql.Row{{""}},
+ },
+ },
+ },
+ {
+ name: "caps testing",
+ setup: []string{
+ "create table XY (x int primary key, Y int, key Yx (Y,x))",
+ "alter table xy add index y2 (y)",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y2"}, {"mydb", "xy", "yx"}},
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(3)}},
+ },
+ {
+ query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(12)}},
+ },
+ {
+ query: "delete from xy where x > 500",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(6)}},
+ },
+ },
+ },
+ {
+ name: "database ddl",
+ setup: []string{
+ "create table mydb.xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "create database repo2",
+ "create table repo2.xy (x int primary key, y int, key (y,x))",
+ "insert into repo2.xy values (0,0), (1,0), (2,0)",
+ "create table repo2.ab (a int primary key, b int, key (b,a))",
+ "insert into repo2.ab values (0,0), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{
+ {"mydb", "xy", "primary"}, {"mydb", "xy", "y"},
+ },
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name",
+ res: []sql.Row{
+ {"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
+ {"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
+ },
+ },
+ {
+ query: "use repo2",
+ },
+ {
+ query: "select database_name, table_name, index_name from dolt_statistics order by index_name",
+ res: []sql.Row{
+ {"repo2", "ab", "b"}, {"repo2", "ab", "primary"},
+ {"repo2", "xy", "primary"}, {"repo2", "xy", "y"},
+ },
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(4)}},
+ },
+ {
+ query: "insert into repo2.xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(10)}},
+ },
+ {
+ query: "drop database repo2",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "use mydb",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ },
+ },
+ {
+ name: "recreate table without index",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(2)}},
+ },
+ {
+ query: "drop table xy",
+ },
+ {
+ query: "create table xy (x int primary key, y int)",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "select count(*) from dolt_statistics",
+ res: []sql.Row{{int64(0)}},
+ },
+ },
+ },
+ {
+ name: "stats info",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_checkout('feat')",
+ },
+ {
+ query: "drop table xy",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_gc()",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_gc()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 1,
+ GcCounter: 3,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_checkout('main')",
+ },
+ {
+ query: "call dolt_branch('-D', 'feat')",
+ },
+ {
+ query: "call dolt_stats_sync()",
+ },
+ {
+ query: "call dolt_stats_gc()",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 1,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 1,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 1,
+ GcCounter: 4,
+ SyncCounter: 2,
+ }.ToJson(),
+ }},
+ },
+ },
+ },
+ {
+ name: "stats stop/start",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_stats_stop()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: false,
+ DbSeedCnt: 0,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_stats_restart()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ },
+ },
+ {
+ name: "stats purge",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "insert into xy values (3,0)",
+ },
+ {
+ query: "call dolt_checkout('feat')",
+ },
+ {
+ query: "insert into xy values (3,0)",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 4,
+ CachedBucketCnt: 4,
+ CachedBoundCnt: 4,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_stats_purge()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: false,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 0,
+ CachedBucketCnt: 0,
+ CachedBoundCnt: 0,
+ CachedTemplateCnt: 0,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_stats_restart()",
+ },
+ {
+ query: "call dolt_stats_wait()",
+ },
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ },
+ },
+ {
+ name: "stats validate",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y,x))",
+ "insert into xy values (0,0), (1,0), (2,0)",
+ "call dolt_add('-A')",
+ "call dolt_commit('-m', 'create xy')",
+ "call dolt_checkout('-b', 'feat')",
+ "call dolt_checkout('main')",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{
+ {dprocedures.StatsInfo{
+ DbCnt: 2,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 2,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 2,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson(),
+ }},
+ },
+ {
+ query: "call dolt_stats_stop()",
+ },
+ {
+ query: "create table ab (a int primary key, b int)",
+ },
+ {
+ query: "insert into ab values (0,0), (1,1), (2,2)",
+ },
+ {
+ query: "call dolt_stats_validate()",
+ err: "(mydb/main) missing template (PRIMARY/e29in)\n(mydb/main) missing bound (d9aov)\n(mydb/main) missing chunk (d9aov)\n",
+ },
+ {
+ query: "call dolt_stats_restart()",
+ },
+ {
+ query: "call dolt_stats_validate()",
+ res: []sql.Row{{"Ok"}},
+ },
+ },
+ },
+ {
+ name: "null bounds",
+ setup: []string{
+ "create table xy (x int primary key, y int, key (y))",
+ "insert into xy values (0,NULL), (1,0), (2,0)",
+ },
+ assertions: []assertion{
+ {
+ query: "call dolt_stats_info()",
+ res: []sql.Row{{dprocedures.StatsInfo{
+ DbCnt: 1,
+ ReadCnt: 0,
+ Active: true,
+ DbSeedCnt: 1,
+ StorageBucketCnt: 2,
+ CachedBucketCnt: 2,
+ CachedBoundCnt: 2,
+ CachedTemplateCnt: 2,
+ StatCnt: 1,
+ GcCounter: 1,
+ SyncCounter: 1,
+ }.ToJson()}},
+ },
+ },
+ },
+ }
+
+ for _, tt := range scripts {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx, sqlEng, sc := emptySetup(t, threads, false)
+ sc.SetEnableGc(true)
+
+ require.NoError(t, sc.Restart(ctx))
+
+ //sc.Debug = true
+
+ for _, s := range tt.setup {
+ require.NoError(t, executeQuery(ctx, sqlEng, s))
+ }
+
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_sync()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()"))
+
+ for i, a := range tt.assertions {
+ log.Println(a.query)
+ rows, err := executeQueryResults(ctx, sqlEng, a.query)
+ if a.err != "" {
+ require.Equal(t, a.err, err.Error())
+ } else {
+ require.NoError(t, err)
+ }
+ if a.res != nil {
+ require.Equal(t, a.res, rows, strconv.Itoa(i)+": "+a.query)
+ }
+ }
+ })
+ }
+}
diff --git a/go/libraries/doltcore/sqle/statspro/seed_job.go b/go/libraries/doltcore/sqle/statspro/seed_job.go
new file mode 100644
index 00000000000..19ba2d9470d
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/seed_job.go
@@ -0,0 +1,124 @@
+// Copyright 2023 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/store/hash"
+)
+
+// GetLatestTable will get the WORKING root table for the current database/branch
+func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (*sqle.DoltTable, *doltdb.Table, error) {
+ var db sqle.Database
+ switch d := sqlDb.(type) {
+ case sqle.Database:
+ db = d
+ case sqle.ReadReplicaDatabase:
+ db = d.Database
+ default:
+ return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb)
+ }
+ sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName)
+ if err != nil {
+ return nil, nil, err
+ }
+ if !ok {
+ return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName)
+ }
+
+ var dTab *doltdb.Table
+ var sqleTable *sqle.DoltTable
+ switch t := sqlTable.(type) {
+ case *sqle.AlterableDoltTable:
+ sqleTable = t.DoltTable
+ dTab, err = t.DoltTable.DoltTable(ctx)
+ case *sqle.WritableDoltTable:
+ sqleTable = t.DoltTable
+ dTab, err = t.DoltTable.DoltTable(ctx)
+ case *sqle.DoltTable:
+ sqleTable = t
+ dTab, err = t.DoltTable(ctx)
+ default:
+ err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable)
+ }
+ if err != nil {
+ return nil, nil, err
+ }
+ return sqleTable, dTab, nil
+}
+
+type templateCacheKey struct {
+ h hash.Hash
+ idxName string
+}
+
+func (k templateCacheKey) String() string {
+ return k.idxName + "/" + k.h.String()[:5]
+}
+
+func (sc *StatsCoord) getTemplate(ctx *sql.Context, sqlTable *sqle.DoltTable, sqlIdx sql.Index) (templateCacheKey, stats.Statistic, error) {
+ schHash, _, err := sqlTable.IndexCacheKey(ctx)
+ key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()}
+ if template, ok := sc.kv.GetTemplate(key); ok {
+ return key, template, nil
+ }
+ fds, colset, err := stats.IndexFds(strings.ToLower(sqlTable.Name()), sqlTable.Schema(), sqlIdx)
+ if err != nil {
+ return templateCacheKey{}, stats.Statistic{}, err
+ }
+
+ var class sql.IndexClass
+ switch {
+ case sqlIdx.IsSpatial():
+ class = sql.IndexClassSpatial
+ case sqlIdx.IsFullText():
+ class = sql.IndexClassFulltext
+ default:
+ class = sql.IndexClassDefault
+ }
+
+ var types []sql.Type
+ for _, cet := range sqlIdx.ColumnExpressionTypes() {
+ types = append(types, cet.Type)
+ }
+
+ tablePrefix := sqlTable.Name() + "."
+ cols := make([]string, len(sqlIdx.Expressions()))
+ for i, c := range sqlIdx.Expressions() {
+ cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix)
+ }
+
+ template := stats.Statistic{
+ Cols: cols,
+ Typs: types,
+ IdxClass: uint8(class),
+ Fds: fds,
+ Colset: colset,
+ }
+
+ // We put template twice, once for schema changes with no data
+ // changes (here), and once when we put chunks to avoid GC dropping
+ // templates before the finalize job.
+ sc.kv.PutTemplate(key, template)
+
+ return key, template, nil
+}
diff --git a/go/libraries/doltcore/sqle/statspro/sender.go b/go/libraries/doltcore/sqle/statspro/sender.go
new file mode 100644
index 00000000000..37fbf3f59a0
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/sender.go
@@ -0,0 +1,315 @@
+package statspro
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
+ "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
+ "github.com/dolthub/dolt/go/libraries/doltcore/ref"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/prolly"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "io"
+ "log"
+ "strings"
+)
+
+// thread that does a full root walk, gets databases/branches/tables
+
+// control work throughput on sender or receiver side?
+
+//
+
+func (sc *StatsCoord) newCycle(ctx context.Context) context.Context {
+ sc.cycleMu.Lock()
+ defer sc.cycleMu.Unlock()
+ if sc.cycleCancel != nil {
+ sc.cycleCancel()
+ }
+ sc.cycleCtx, sc.cycleCancel = context.WithCancel(ctx)
+ return sc.cycleCtx
+}
+
+func (sc *StatsCoord) cancelSender() {
+ sc.cycleMu.Lock()
+ defer sc.cycleMu.Unlock()
+ if sc.cycleCancel != nil {
+ sc.cycleCancel()
+ sc.cycleCancel = nil
+ }
+}
+
+func (sc *StatsCoord) getCycleWaiter() <-chan struct{} {
+ sc.cycleMu.Lock()
+ defer sc.cycleMu.Unlock()
+ return sc.cycleCtx.Done()
+}
+
+func (sc *StatsCoord) runSender(ctx context.Context) (err error) {
+ sc.senderDone = make(chan struct{})
+ defer func() {
+ close(sc.senderDone)
+ }()
+ for {
+ cycleCtx := sc.newCycle(ctx)
+
+ sqlCtx, err := sc.ctxGen(cycleCtx)
+ if err != nil {
+ return err
+ }
+
+ newStats, err := sc.newStatsForRoot(sqlCtx)
+ if err != nil {
+ sc.descError("", err)
+ }
+
+ sc.statsMu.Lock()
+ sc.Stats = newStats
+ sc.statsMu.Unlock()
+
+ select {
+ case <-cycleCtx.Done():
+ return context.Cause(cycleCtx)
+ }
+ }
+}
+
+func (sc *StatsCoord) newStatsForRoot(ctx *sql.Context) (map[tableIndexesKey][]*stats.Statistic, error) {
+ var err error
+ dSess := dsess.DSessFromSess(ctx.Session)
+ dbs := dSess.Provider().AllDatabases(ctx)
+ newStats := make(map[tableIndexesKey][]*stats.Statistic)
+ for _, db := range dbs {
+ sqlDb, ok := db.(sqle.Database)
+ if !ok {
+ continue
+ }
+
+ var branches []ref.DoltRef
+ if err := sc.sq.DoSync(ctx, func() {
+ ddb, ok := dSess.GetDoltDB(ctx, db.Name())
+ if !ok {
+ sc.descError("dolt database not found "+db.Name(), nil)
+ }
+ branches, err = ddb.GetBranches(ctx)
+ if err != nil {
+ sc.descError("getBranches", err)
+ }
+ }); err != nil {
+ return nil, err
+ }
+
+ for _, br := range branches {
+ sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), br.GetPath(), br.GetPath()+"/"+sqlDb.AliasedName())
+ if err != nil {
+ sc.descError("revisionForBranch", err)
+ continue
+ }
+
+ var tableNames []string
+ if err := sc.sq.DoSync(ctx, func() {
+ tableNames, err = sqlDb.GetTableNames(ctx)
+ if err != nil {
+ sc.descError("getTableNames", err)
+ }
+ }); err != nil {
+ return nil, err
+ }
+
+ for _, tableName := range tableNames {
+ tableKey, newTableStats, err := sc.updateTable(ctx, tableName, sqlDb)
+ if err != nil {
+ return nil, err
+ }
+ newStats[tableKey] = newTableStats
+ }
+ }
+ }
+
+ return newStats, nil
+}
+
+func (sc *StatsCoord) finalizeHistogram(template stats.Statistic, buckets []*stats.Bucket, firstBound sql.Row) *stats.Statistic {
+ template.LowerBnd = firstBound
+ for _, b := range buckets {
+ // accumulate counts
+ template.RowCnt += b.RowCnt
+ template.DistinctCnt += b.DistinctCnt
+ template.NullCnt += b.NullCnt
+ template.Hist = append(template.Hist, b)
+ }
+ return &template
+}
+
+func (sc *StatsCoord) collectIndexNodes(ctx *sql.Context, prollyMap prolly.Map, idxLen int, nodes []tree.Node) ([]*stats.Bucket, sql.Row, error) {
+ updater := newBucketBuilder(sql.StatQualifier{}, idxLen, prollyMap.KeyDesc())
+ keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen))
+
+ firstNodeHash := nodes[0].HashOf()
+ lowerBound, ok := sc.kv.GetBound(firstNodeHash, idxLen)
+ if !ok {
+ sc.sq.DoSync(ctx, func() {
+ var err error
+ lowerBound, err = firstRowForIndex(ctx, prollyMap, keyBuilder)
+ if err != nil {
+ sc.descError("get histogram bucket for node", err)
+ }
+ if sc.Debug {
+ log.Printf("put bound: %s: %v\n", firstNodeHash.String()[:5], lowerBound)
+ }
+
+ sc.kv.PutBound(firstNodeHash, lowerBound, idxLen)
+ })
+ }
+
+ var offset uint64
+ var buckets []*stats.Bucket
+ for _, n := range nodes {
+ if _, ok, err := sc.kv.GetBucket(ctx, n.HashOf(), keyBuilder); err != nil {
+ return nil, nil, err
+ } else if ok {
+ continue
+ }
+
+ treeCnt, err := n.TreeCount()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ err = sc.sq.DoSync(ctx, func() {
+ updater.newBucket()
+
+ // we read exclusive range [node first key, next node first key)
+ start, stop := offset, offset+uint64(treeCnt)
+ iter, err := prollyMap.IterOrdinalRange(ctx, start, stop)
+ if err != nil {
+ sc.descError("get histogram bucket for node", err)
+ return
+ }
+ for {
+ // stats key will be a prefix of the index key
+ keyBytes, _, err := iter.Next(ctx)
+ if errors.Is(err, io.EOF) {
+ break
+ } else if err != nil {
+ sc.descError("get histogram bucket for node", err)
+ return
+ }
+ // build full key
+ for i := range keyBuilder.Desc.Types {
+ keyBuilder.PutRaw(i, keyBytes.GetField(i))
+ }
+
+ updater.add(keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen))
+ keyBuilder.Recycle()
+ }
+
+ // finalize the aggregation
+ newBucket, err := updater.finalize(ctx, prollyMap.NodeStore())
+ if err != nil {
+ sc.descError("get histogram bucket for node", err)
+ return
+ }
+ err = sc.kv.PutBucket(ctx, n.HashOf(), newBucket, keyBuilder)
+ if err != nil {
+ sc.descError("get histogram bucket for node", err)
+ return
+ }
+ buckets = append(buckets, newBucket)
+ })
+ if err != nil {
+ return nil, nil, err
+ }
+ offset += uint64(treeCnt)
+ }
+
+ return buckets, lowerBound, nil
+}
+
+func (sc *StatsCoord) updateTable(ctx *sql.Context, tableName string, sqlDb dsess.SqlDatabase) (tableIndexesKey, []*stats.Statistic, error) {
+ var err error
+ var sqlTable *sqle.DoltTable
+ var dTab *doltdb.Table
+ if err := sc.sq.DoSync(ctx, func() {
+ sqlTable, dTab, err = GetLatestTable(ctx, tableName, sqlDb)
+ if err != nil {
+ sc.descError("GetLatestTable", err)
+ }
+ }); err != nil {
+ return tableIndexesKey{}, nil, err
+ }
+
+ tableKey := tableIndexesKey{
+ db: sqlDb.AliasedName(),
+ branch: sqlDb.Revision(),
+ table: tableName,
+ schema: "",
+ }
+
+ var indexes []sql.Index
+ if err := sc.sq.DoSync(ctx, func() {
+ indexes, err = sqlTable.GetIndexes(ctx)
+ if err != nil {
+ sc.descError("", err)
+ }
+ }); err != nil {
+ return tableIndexesKey{}, nil, err
+ }
+
+ var newTableStats []*stats.Statistic
+ for _, sqlIdx := range indexes {
+ var idx durable.Index
+ var err error
+ if strings.EqualFold(sqlIdx.ID(), "PRIMARY") {
+ idx, err = dTab.GetRowData(ctx)
+ } else {
+ idx, err = dTab.GetIndexRowData(ctx, sqlIdx.ID())
+ }
+ if err != nil {
+ sc.descError("GetRowData", err)
+ continue
+ }
+
+ var template stats.Statistic
+ if err := sc.sq.DoSync(ctx, func() {
+ _, template, err = sc.getTemplate(ctx, sqlTable, sqlIdx)
+ if err != nil {
+ sc.descError("", fmt.Errorf("stats collection failed to generate a statistic template: %s.%s.%s:%T; %s", sqlDb.RevisionQualifiedName(), tableName, sqlIdx, sqlIdx, err))
+ }
+ }); err != nil {
+ return tableIndexesKey{}, nil, err
+ } else if template.Fds.Empty() {
+ return tableIndexesKey{}, nil, fmt.Errorf("failed to creat template for %s/%s/%s/%s", sqlDb.Revision(), sqlDb.AliasedName(), tableName, sqlIdx.ID())
+ }
+
+ idxLen := len(sqlIdx.Expressions())
+
+ prollyMap := durable.ProllyMapFromIndex(idx)
+ var levelNodes []tree.Node
+ if err := sc.sq.DoSync(ctx, func() {
+ levelNodes, err = tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt)
+ if err != nil {
+ sc.descError("", err)
+ }
+ return
+ }); err != nil {
+ return tableIndexesKey{}, nil, err
+ }
+ var buckets []*stats.Bucket
+ var firstBound sql.Row
+ if len(levelNodes) > 0 {
+ buckets, firstBound, err = sc.collectIndexNodes(ctx, prollyMap, idxLen, levelNodes)
+ if err != nil {
+ sc.descError("", err)
+ continue
+ }
+ }
+ newTableStats = append(newTableStats, sc.finalizeHistogram(template, buckets, firstBound))
+ }
+ return tableKey, newTableStats, nil
+}
diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv.go b/go/libraries/doltcore/sqle/statspro/stats_kv.go
new file mode 100644
index 00000000000..b24492597d3
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/stats_kv.go
@@ -0,0 +1,556 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/dolthub/go-mysql-server/sql/types"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/schema"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/prolly"
+ "github.com/dolthub/dolt/go/store/prolly/tree"
+ "github.com/dolthub/dolt/go/store/val"
+)
+
+var ErrIncompatibleVersion = errors.New("client stats version mismatch")
+
+const defaultBucketSize = 1024 // must be > 0 to avoid panic
+
+type StatsKv interface {
+ PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error
+ GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error)
+ GetTemplate(key templateCacheKey) (stats.Statistic, bool)
+ PutTemplate(key templateCacheKey, stat stats.Statistic)
+ GetBound(h hash.Hash, len int) (sql.Row, bool)
+ PutBound(h hash.Hash, r sql.Row, l int)
+ Flush(ctx context.Context) (int, error)
+ StartGc(ctx context.Context, sz int) error
+ MarkBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) error
+ FinishGc(context.Context) error
+ Len() int
+}
+
+var _ StatsKv = (*prollyStats)(nil)
+var _ StatsKv = (*memStats)(nil)
+
+func NewMemStats() *memStats {
+ return &memStats{
+ mu: sync.Mutex{},
+ buckets: make(map[bucketKey]*stats.Bucket),
+ templates: make(map[templateCacheKey]stats.Statistic),
+ bounds: make(map[bucketKey]sql.Row),
+ }
+}
+
+type memStats struct {
+ mu sync.Mutex
+ doGc bool
+
+ //buckets *lru.Cache[bucketKey, *stats.Bucket]
+ //nextBuckets *lru.Cache[bucketKey, *stats.Bucket]
+ buckets map[bucketKey]*stats.Bucket
+ nextBuckets map[bucketKey]*stats.Bucket
+
+ templates map[templateCacheKey]stats.Statistic
+ nextTemplates map[templateCacheKey]stats.Statistic
+
+ bounds map[bucketKey]sql.Row
+ nextBounds map[bucketKey]sql.Row
+
+ epochCnt int
+}
+
+func (m *memStats) StorageCnt(context.Context) (int, error) {
+ return 0, nil
+}
+
+func (m *memStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ t, ok := m.templates[key]
+ if !ok {
+ return stats.Statistic{}, false
+ }
+ if m.doGc {
+ m.nextTemplates[key] = t
+ }
+ return t, true
+}
+
+func (m *memStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.templates[key] = stat
+ if m.doGc {
+ m.nextTemplates[key] = stat
+ }
+}
+
+type bucketKey [22]byte
+
+func getBucketKey(h hash.Hash, l int) bucketKey {
+ var k bucketKey
+ copy(k[:hash.ByteLen], h[:])
+ binary.BigEndian.PutUint16(k[hash.ByteLen:], uint16(l))
+ return k
+}
+
+func (m *memStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, l)
+ r, ok := m.bounds[k]
+ if !ok {
+ return nil, false
+ }
+ if m.doGc {
+ m.nextBounds[k] = r
+ }
+ return r, true
+}
+
+func (m *memStats) PutBound(h hash.Hash, r sql.Row, l int) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, l)
+ m.bounds[k] = r
+ if m.doGc {
+ m.nextBounds[k] = r
+ }
+}
+
+func (m *memStats) StartGc(ctx context.Context, sz int) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.doGc = true
+ if sz == 0 {
+ sz = len(m.buckets) * 2
+ }
+ var err error
+ //m.nextBuckets, err = lru.New[bucketKey, *stats.Bucket](sz)
+ m.nextBuckets = make(map[bucketKey]*stats.Bucket, sz)
+ if err != nil {
+ return err
+ }
+ m.nextBounds = make(map[bucketKey]sql.Row)
+ m.nextTemplates = make(map[templateCacheKey]stats.Statistic)
+ return nil
+}
+
+func (m *memStats) RestartEpoch() {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.epochCnt = 0
+}
+
+func (m *memStats) FinishGc(context.Context) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ m.buckets = m.nextBuckets
+ m.templates = m.nextTemplates
+ m.bounds = m.nextBounds
+ m.nextBuckets = nil
+ m.nextTemplates = nil
+ m.nextBounds = nil
+ m.doGc = false
+ return nil
+}
+
+func (m *memStats) Len() int {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ return len(m.buckets)
+}
+
+func (m *memStats) PutBucket(_ context.Context, h hash.Hash, b *stats.Bucket, _ *val.TupleBuilder) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, len(b.BoundVal))
+ m.buckets[k] = b
+ return nil
+}
+
+func (m *memStats) MarkBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ k := getBucketKey(h, tupB.Desc.Count())
+ b, ok := m.buckets[k]
+ if ok {
+ m.nextBuckets[k] = b
+ }
+ return nil
+}
+
+func (m *memStats) GetBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+ if h.IsEmpty() {
+ return nil, false, nil
+ }
+ k := getBucketKey(h, tupB.Desc.Count())
+ b, ok := m.buckets[k]
+ return b, ok, nil
+}
+
+func (m *memStats) Flush(_ context.Context) (int, error) {
+ return 0, nil
+}
+
+func NewProllyStats(ctx context.Context, destDb dsess.SqlDatabase) (*prollyStats, error) {
+ sch := schema.StatsTableDoltSchema
+ kd, vd := sch.GetMapDescriptors()
+
+ keyBuilder := val.NewTupleBuilder(kd)
+ valueBuilder := val.NewTupleBuilder(vd)
+ newMap, err := prolly.NewMapFromTuples(ctx, destDb.DbData().Ddb.NodeStore(), kd, vd)
+ if err != nil {
+ return nil, err
+ }
+
+ return &prollyStats{
+ mu: sync.Mutex{},
+ destDb: destDb,
+ kb: keyBuilder,
+ vb: valueBuilder,
+ m: newMap.Mutate(),
+ mem: NewMemStats(),
+ }, nil
+}
+
+type prollyStats struct {
+ mu sync.Mutex
+ destDb dsess.SqlDatabase
+ kb, vb *val.TupleBuilder
+ m *prolly.MutableMap
+ newM *prolly.MutableMap
+ mem *memStats
+}
+
+func (p *prollyStats) Len() int {
+ return p.mem.Len()
+}
+
+func (p *prollyStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) {
+ return p.mem.GetTemplate(key)
+}
+
+func (p *prollyStats) PutTemplate(key templateCacheKey, stat stats.Statistic) {
+ p.mem.PutTemplate(key, stat)
+}
+
+func (p *prollyStats) GetBound(h hash.Hash, l int) (sql.Row, bool) {
+ return p.mem.GetBound(h, l)
+}
+
+func (p *prollyStats) PutBound(h hash.Hash, r sql.Row, l int) {
+ p.mem.PutBound(h, r, l)
+}
+
+func (p *prollyStats) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error {
+ if err := p.mem.PutBucket(ctx, h, b, tupB); err != nil {
+ return err
+ }
+
+ k, err := p.encodeHash(h, tupB.Desc.Count())
+ if err != nil {
+ return err
+ }
+ v, err := p.encodeBucket(ctx, b, tupB)
+ if err != nil {
+ return err
+ }
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ return p.m.Put(ctx, k, v)
+}
+
+func (p *prollyStats) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) {
+ if h.IsEmpty() {
+ return nil, false, nil
+ }
+ b, ok, err := p.mem.GetBucket(ctx, h, tupB)
+ if err != nil {
+ return nil, false, err
+ }
+ if ok {
+ return b, true, nil
+ }
+
+ // missing bucket and not GC'ing, try disk
+ k, err := p.encodeHash(h, tupB.Desc.Count())
+ if err != nil {
+ return nil, false, err
+ }
+
+ var v val.Tuple
+ err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error {
+ if key != nil {
+ ok = true
+ v = value
+ }
+ return nil
+ })
+ if !ok || err != nil {
+ return nil, false, err
+ }
+
+ if tupB == nil {
+ // still function if treating like memStats
+ return nil, true, nil
+ }
+
+ b, err = p.decodeBucketTuple(ctx, v, tupB)
+ if err != nil {
+ return nil, false, err
+ }
+
+ p.mem.PutBucket(ctx, h, b, tupB)
+ return b, true, nil
+}
+
+func (p *prollyStats) Flush(ctx context.Context) (int, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ flushedMap, err := p.m.Map(ctx)
+ if err != nil {
+ return 0, err
+ }
+ if err := p.destDb.DbData().Ddb.SetStatistics(ctx, "main", flushedMap.HashOf()); err != nil {
+ return 0, err
+ }
+
+ cnt, err := flushedMap.Count()
+ return cnt, err
+}
+
+func (p *prollyStats) StartGc(ctx context.Context, sz int) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ if err := p.mem.StartGc(ctx, sz); err != nil {
+ return err
+ }
+ kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
+ newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd)
+ if err != nil {
+ return err
+ }
+ p.newM = newMap.Mutate()
+
+ return nil
+}
+
+func (p *prollyStats) MarkBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) error {
+ p.mem.MarkBucket(ctx, h, tupB)
+
+ // try disk
+ k, err := p.encodeHash(h, tupB.Desc.Count())
+ if err != nil {
+ return err
+ }
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ var v val.Tuple
+ var ok bool
+ err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error {
+ if key != nil {
+ ok = true
+ v = value
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ if !ok {
+ return nil
+ }
+
+ return p.newM.Put(ctx, k, v)
+}
+
+func (p *prollyStats) FinishGc(context.Context) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.mem.FinishGc(nil)
+ m, err := p.newM.Map(context.Background())
+ if err != nil {
+ return err
+ }
+ p.m = m.Mutate()
+ p.newM = nil
+
+ return nil
+}
+
+func (p *prollyStats) encodeHash(h hash.Hash, len int) (val.Tuple, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+ p.kb.PutInt64(0, int64(len))
+ if err := p.kb.PutString(1, h.String()); err != nil {
+ return nil, err
+ }
+ return p.kb.Build(p.m.NodeStore().Pool()), nil
+}
+
+func (p *prollyStats) decodeHashTuple(v val.Tuple) (int, hash.Hash, error) {
+ l, ok := p.kb.Desc.GetInt64(0, v)
+ hStr, ok := p.kb.Desc.GetString(1, v)
+ if !ok {
+ return 0, hash.Hash{}, fmt.Errorf("unexpected null hash")
+ }
+ return int(l), hash.Parse(hStr), nil
+}
+
+func (p *prollyStats) decodeBucketTuple(ctx context.Context, v val.Tuple, tupB *val.TupleBuilder) (*stats.Bucket, error) {
+ var row []interface{}
+ for i := 0; i < p.vb.Desc.Count(); i++ {
+ f, err := tree.GetField(ctx, p.vb.Desc, i, v, p.m.NodeStore())
+ if err != nil {
+ return nil, err
+ }
+ row = append(row, f)
+ }
+
+ version := row[0]
+ if version != schema.StatsVersion {
+ return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion)
+ }
+ rowCount := row[1].(int64)
+ distinctCount := row[2].(int64)
+ nullCount := row[3].(int64)
+ boundRowStr := row[4].(string)
+ upperBoundCnt := row[5].(int64)
+ mcvCountsStr := row[10].(string)
+
+ boundRow, err := DecodeRow(ctx, p.m.NodeStore(), boundRowStr, tupB)
+ if err != nil {
+ return nil, err
+ }
+
+ var mcvCnts []uint64
+ if len(mcvCountsStr) > 0 {
+ for _, c := range strings.Split(mcvCountsStr, ",") {
+ cnt, err := strconv.ParseInt(c, 10, 64)
+ if err != nil {
+ return nil, err
+ }
+ mcvCnts = append(mcvCnts, uint64(cnt))
+ }
+ }
+
+ mcvs := make([]sql.Row, 4)
+ for i, v := range row[6:10] {
+ if v != nil && v != "" {
+ row, err := DecodeRow(ctx, p.m.NodeStore(), v.(string), tupB)
+ if err != nil {
+ return nil, err
+ }
+ mcvs[i] = row
+ }
+ }
+
+ return &stats.Bucket{
+ RowCnt: uint64(rowCount),
+ DistinctCnt: uint64(distinctCount),
+ NullCnt: uint64(nullCount),
+ McvsCnt: mcvCnts,
+ BoundCnt: uint64(upperBoundCnt),
+ BoundVal: boundRow,
+ McvVals: mcvs,
+ }, nil
+}
+
+var mcvTypes = []sql.Type{types.Int16, types.Int16, types.Int16, types.Int16}
+
+func (p *prollyStats) encodeBucket(ctx context.Context, b *stats.Bucket, tupB *val.TupleBuilder) (val.Tuple, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ p.vb.PutInt64(0, schema.StatsVersion)
+ p.vb.PutInt64(1, int64(b.RowCount()))
+ p.vb.PutInt64(2, int64(b.DistinctCount()))
+ p.vb.PutInt64(3, int64(b.NullCount()))
+ boundRow, err := EncodeRow(ctx, p.m.NodeStore(), b.UpperBound(), tupB)
+ if err != nil {
+ return nil, err
+ }
+ p.vb.PutString(4, string(boundRow))
+ p.vb.PutInt64(5, int64(b.BoundCount()))
+ for i, r := range b.Mcvs() {
+ mcvRow, err := EncodeRow(ctx, p.m.NodeStore(), r, tupB)
+ if err != nil {
+ return nil, err
+ }
+ p.vb.PutString(6+i, string(mcvRow))
+ }
+ var mcvCntsRow sql.Row
+ for _, v := range b.McvCounts() {
+ mcvCntsRow = append(mcvCntsRow, int(v))
+ }
+ p.vb.PutString(10, stats.StringifyKey(mcvCntsRow, mcvTypes[:len(mcvCntsRow)]))
+
+ return p.vb.Build(p.m.NodeStore().Pool()), nil
+}
+
+func (p *prollyStats) NewEmpty(ctx context.Context) (StatsKv, error) {
+ kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors()
+ newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd)
+ if err != nil {
+ return nil, err
+ }
+ m := newMap.Mutate()
+ return &prollyStats{m: m, destDb: p.destDb, kb: p.kb, vb: p.vb}, nil
+}
+
+func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) {
+ for i := range tb.Desc.Count() {
+ v := r[i]
+ if v == nil {
+ continue
+ }
+ if err := tree.PutField(ctx, ns, tb, i, v); err != nil {
+ return nil, err
+ }
+ }
+ return tb.Build(ns.Pool()), nil
+}
+
+func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) {
+ tup := []byte(s)
+ r := make(sql.Row, tb.Desc.Count())
+ var err error
+ for i, _ := range r {
+ r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return r, nil
+}
diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv_test.go b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go
new file mode 100644
index 00000000000..94907998137
--- /dev/null
+++ b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go
@@ -0,0 +1,231 @@
+// Copyright 2025 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statspro
+
+import (
+ "context"
+ "strconv"
+ "strings"
+ "testing"
+
+ "github.com/dolthub/go-mysql-server/sql"
+ "github.com/dolthub/go-mysql-server/sql/stats"
+ "github.com/stretchr/testify/require"
+
+ "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
+ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
+ "github.com/dolthub/dolt/go/store/hash"
+ "github.com/dolthub/dolt/go/store/val"
+)
+
+func TestProllyKv(t *testing.T) {
+ threads := sql.NewBackgroundThreads()
+ prollyKv := newTestProllyKv(t, threads)
+
+ h := hash.Parse(strings.Repeat("a", hash.StringLen))
+ h2 := hash.Parse(strings.Repeat("b", hash.StringLen))
+ k := getBucketKey(h, 2)
+
+ tupB := val.NewTupleBuilder(val.NewTupleDescriptor(
+ val.Type{Enc: val.Int64Enc, Nullable: true},
+ val.Type{Enc: val.StringEnc, Nullable: true},
+ ))
+
+ t.Run("test bounds", func(t *testing.T) {
+ exp := sql.Row{1, 1}
+ prollyKv.PutBound(h, exp, 2)
+ cmp, ok := prollyKv.GetBound(h, 2)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ _, ok = prollyKv.GetBound(h2, 2)
+ require.False(t, ok)
+ })
+
+ t.Run("test templates", func(t *testing.T) {
+ exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}}
+ key := templateCacheKey{
+ h: h,
+ idxName: "PRIMARY",
+ }
+ prollyKv.PutTemplate(key, exp)
+ cmp, ok := prollyKv.GetTemplate(key)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ key2 := templateCacheKey{
+ h: h2,
+ idxName: "PRIMARY",
+ }
+ _, ok = prollyKv.GetTemplate(key2)
+ require.False(t, ok)
+ })
+
+ t.Run("test buckets", func(t *testing.T) {
+ exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
+ require.NoError(t, err)
+ cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ _, ok, err = prollyKv.GetBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+ require.False(t, ok)
+
+ // delete from memory, should pull from disk when |tupB| supplied
+ delete(prollyKv.mem.buckets, k)
+
+ cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp, cmp)
+
+ cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp.RowCnt, cmp.RowCnt)
+ require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt)
+ require.Equal(t, exp.NullCnt, cmp.NullCnt)
+ require.Equal(t, exp.McvsCnt, cmp.McvsCnt)
+ require.Equal(t, exp.McvVals[0], cmp.McvVals[0])
+ require.Equal(t, exp.McvVals[1], cmp.McvVals[1])
+ require.Equal(t, exp.McvVals[2], cmp.McvVals[2])
+ require.Equal(t, exp.McvVals[3], cmp.McvVals[3])
+ require.Equal(t, exp.BoundVal, cmp.BoundVal)
+ require.Equal(t, exp.BoundCnt, cmp.BoundCnt)
+ })
+
+ t.Run("test bucket GC", func(t *testing.T) {
+ exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ err := prollyKv.PutBucket(context.Background(), h, exp, tupB)
+ require.NoError(t, err)
+
+ exp2 := stats.NewHistogramBucket(10, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ err = prollyKv.PutBucket(context.Background(), h2, exp2, tupB)
+ require.NoError(t, err)
+
+ prollyKv.StartGc(context.Background(), 10)
+ err = prollyKv.MarkBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ err = prollyKv.MarkBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+
+ prollyKv.FinishGc(nil)
+
+ m, _ := prollyKv.m.Map(context.Background())
+ iter, _ := m.IterAll(context.Background())
+ for i := range 2 {
+ k, _, err := iter.Next(context.Background())
+ if i == 0 {
+ require.Equal(t, "( 2, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa )", prollyKv.kb.Desc.Format(k))
+ } else if i == 1 {
+ require.Equal(t, "( 2, bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb )", prollyKv.kb.Desc.Format(k))
+ } else if i == 2 {
+ require.Error(t, err)
+ }
+ }
+
+ prollyKv.StartGc(context.Background(), 10)
+ err = prollyKv.MarkBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+ prollyKv.FinishGc(nil)
+
+ cmp2, ok, err := prollyKv.GetBucket(context.Background(), h2, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ require.Equal(t, exp2.BoundCount(), cmp2.BoundCnt)
+ // only tagged one bucket
+ require.Equal(t, 1, prollyKv.Len())
+ })
+
+ t.Run("test overflow", func(t *testing.T) {
+ prollyKv.StartGc(context.Background(), 10)
+ prollyKv.FinishGc(nil)
+
+ expLen := 2000
+ var expected []hash.Hash
+ for i := range expLen {
+ exp := stats.NewHistogramBucket(uint64(i), 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket)
+ nh := strconv.AppendInt(nil, int64(i), 10)
+ nh = append(nh, h[:hash.ByteLen-len(nh)]...)
+ newH := hash.New(nh)
+ expected = append(expected, newH)
+ err := prollyKv.PutBucket(context.Background(), newH, exp, tupB)
+ require.NoError(t, err)
+ }
+
+ for _, h := range expected {
+ _, ok, err := prollyKv.GetBucket(context.Background(), h, tupB)
+ require.NoError(t, err)
+ require.True(t, ok)
+ }
+
+ require.Equal(t, expLen, prollyKv.Len())
+ })
+
+ t.Run("test bounds GC", func(t *testing.T) {
+ exp := sql.Row{1, 1}
+ prollyKv.PutBound(h, exp, 2)
+ prollyKv.PutBound(h2, exp, 2)
+
+ prollyKv.StartGc(context.Background(), 10)
+ prollyKv.GetBound(h2, 2)
+ prollyKv.FinishGc(nil)
+
+ require.Equal(t, 1, len(prollyKv.mem.bounds))
+ })
+
+ t.Run("test templates GC", func(t *testing.T) {
+ exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}}
+ key := templateCacheKey{
+ h: h,
+ idxName: "PRIMARY",
+ }
+ key2 := templateCacheKey{
+ h: h2,
+ idxName: "PRIMARY",
+ }
+ prollyKv.PutTemplate(key, exp)
+ prollyKv.PutTemplate(key2, exp)
+
+ prollyKv.StartGc(context.Background(), 10)
+ prollyKv.GetTemplate(key2)
+ prollyKv.FinishGc(nil)
+
+ require.Equal(t, 1, len(prollyKv.mem.templates))
+ })
+
+}
+
+func newTestProllyKv(t *testing.T, threads *sql.BackgroundThreads) *prollyStats {
+ dEnv := dtestutils.CreateTestEnv()
+
+ sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads)
+ ctx.Session.SetClient(sql.Client{
+ User: "billy boy",
+ Address: "bigbillie@fake.horse",
+ })
+ require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb"))
+ require.NoError(t, executeQuery(ctx, sqlEng, "use mydb"))
+
+ startDbs := sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx)
+
+ kv, err := NewProllyStats(ctx, startDbs[0].(dsess.SqlDatabase))
+ require.NoError(t, err)
+
+ return kv
+}
diff --git a/go/libraries/doltcore/sqle/statspro/stats_provider.go b/go/libraries/doltcore/sqle/statspro/stats_provider.go
deleted file mode 100644
index 573e20b638a..00000000000
--- a/go/libraries/doltcore/sqle/statspro/stats_provider.go
+++ /dev/null
@@ -1,535 +0,0 @@
-// Copyright 2023 Dolthub, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package statspro
-
-import (
- "context"
- "errors"
- "fmt"
- "path/filepath"
- "strings"
- "sync"
-
- "github.com/dolthub/go-mysql-server/sql"
-
- "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
- "github.com/dolthub/dolt/go/libraries/doltcore/env"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
- "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
- "github.com/dolthub/dolt/go/store/hash"
- "github.com/dolthub/dolt/go/store/prolly/tree"
-)
-
-var ErrFailedToLoad = errors.New("failed to load statistics")
-
-type indexMeta struct {
- qual sql.StatQualifier
- cols []string
- newNodes []tree.Node
- // updateOrdinals are [start, stop] tuples for each update chunk
- updateOrdinals []updateOrdinal
- keepChunks []sql.HistogramBucket
- dropChunks []sql.HistogramBucket
- allAddrs []hash.Hash
-}
-
-type updateOrdinal struct {
- start, stop uint64
-}
-
-func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider {
- return &Provider{
- pro: pro,
- sf: sf,
- mu: &sync.Mutex{},
- statDbs: make(map[string]Database),
- autoCtxCancelers: make(map[string]context.CancelFunc),
- analyzeCtxCancelers: make(map[string]context.CancelFunc),
- status: make(map[string]string),
- lockedTables: make(map[string]bool),
- }
-}
-
-// Provider is the engine interface for reading and writing index statistics.
-// Each database has its own statistics table that all tables/indexes in a db
-// share.
-type Provider struct {
- mu *sync.Mutex
- pro *sqle.DoltDatabaseProvider
- sf StatsFactory
- statDbs map[string]Database
- autoCtxCancelers map[string]context.CancelFunc
- analyzeCtxCancelers map[string]context.CancelFunc
- starter sqle.InitDatabaseHook
- status map[string]string
- lockedTables map[string]bool
-}
-
-// each database has one statistics table that is a collection of the
-// table stats in the database
-type dbToStats struct {
- mu *sync.Mutex
- dbName string
- stats map[sql.StatQualifier]*DoltStats
- statsDatabase Database
- latestTableHashes map[string]hash.Hash
-}
-
-func newDbStats(dbName string) *dbToStats {
- return &dbToStats{
- mu: &sync.Mutex{},
- dbName: dbName,
- stats: make(map[sql.StatQualifier]*DoltStats),
- latestTableHashes: make(map[string]hash.Hash),
- }
-}
-
-var _ sql.StatsProvider = (*Provider)(nil)
-
-func (p *Provider) Close() error {
- var lastErr error
- for _, db := range p.statDbs {
- if err := db.Close(); err != nil {
- lastErr = err
- }
- }
- return lastErr
-}
-
-func (p *Provider) TryLockForUpdate(branch, db, table string) bool {
- p.mu.Lock()
- defer p.mu.Unlock()
- lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
- if ok := p.lockedTables[lockId]; ok {
- return false
- }
- p.lockedTables[lockId] = true
- return true
-}
-
-func (p *Provider) UnlockTable(branch, db, table string) {
- p.mu.Lock()
- defer p.mu.Unlock()
- lockId := fmt.Sprintf("%s.%s.%s", branch, db, table)
- p.lockedTables[lockId] = false
- return
-}
-
-func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error {
- err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db)
-
- if err != nil {
- p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error()))
- return err
- }
- p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name))
- return nil
-}
-
-func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) {
- p.starter = hook
-}
-
-func (p *Provider) CancelRefreshThread(dbName string) {
- p.mu.Lock()
- if cancel, ok := p.autoCtxCancelers[dbName]; ok {
- cancel()
- }
- p.mu.Unlock()
- p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName))
-
-}
-
-func (p *Provider) ThreadStatus(dbName string) string {
- p.mu.Lock()
- defer p.mu.Unlock()
-
- if msg, ok := p.status[dbName]; ok {
- return msg
- }
- return "no active stats thread"
-}
-
-func (p *Provider) TrackedBranches(dbName string) []string {
- db, ok := p.getStatDb(dbName)
- if !ok {
- return nil
- }
- return db.Branches()
-
-}
-
-func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) {
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil, nil
- }
-
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- return p.GetTableDoltStats(ctx, branch, db, schemaName, table.Name())
-}
-
-func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) {
- statDb, ok := p.getStatDb(db)
- if !ok || statDb == nil {
- return nil, nil
- }
-
- if branch == "" {
- dSess := dsess.DSessFromSess(ctx.Session)
- var err error
- branch, err = dSess.GetBranch()
- if err != nil {
- return nil, nil
- }
- }
-
- var ret []sql.Statistic
- for _, qual := range statDb.ListStatQuals(branch) {
- if strings.EqualFold(db, qual.Database) && strings.EqualFold(schema, qual.Sch) && strings.EqualFold(table, qual.Tab) {
- stat, _ := statDb.GetStat(branch, qual)
- ret = append(ret, stat)
- }
- }
-
- return ret, nil
-}
-
-func (p *Provider) setStatDb(name string, db Database) {
- p.mu.Lock()
- defer p.mu.Unlock()
- p.statDbs[name] = db
-}
-
-func (p *Provider) getStatDb(name string) (Database, bool) {
- p.mu.Lock()
- defer p.mu.Unlock()
- statDb, ok := p.statDbs[strings.ToLower(name)]
- return statDb, ok
-}
-
-func (p *Provider) deleteStatDb(name string) {
- p.mu.Lock()
- defer p.mu.Unlock()
- delete(p.statDbs, strings.ToLower(name))
-}
-
-func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error {
- statDb, ok := p.getStatDb(s.Qualifier().Db())
- if !ok {
- return nil
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil
- }
-
- doltStat, err := DoltStatsFromSql(s)
- if err != nil {
- return err
- }
-
- p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db()))
-
- return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat)
-}
-
-func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) {
- statDb, ok := p.getStatDb(qual.Db())
- if !ok {
- return nil, false
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil, false
- }
-
- return statDb.GetStat(branch, qual)
-}
-
-func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) {
- stat, ok := p.getQualStats(ctx, qual)
- if !ok {
- return nil, false
- }
- return stat, true
-}
-
-func (p *Provider) DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return nil
- }
-
- p.mu.Lock()
- defer p.mu.Unlock()
-
- p.status[db] = "dropped"
-
- return statDb.DeleteBranchStats(ctx, branch, flush)
-}
-
-func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return nil
- }
- for _, branch := range statDb.Branches() {
- // remove provider access
- p.DropBranchDbStats(ctx, branch, db, flush)
- }
-
- if flush {
- p.deleteStatDb(db)
- }
-
- return nil
-}
-
-func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error {
- statDb, ok := p.getStatDb(qual.Db())
- if !ok {
- return nil
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return nil
- }
-
- if _, ok := statDb.GetStat(branch, qual); ok {
- statDb.DeleteStats(ctx, branch, qual)
- p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String()))
- }
-
- return nil
-}
-
-func (p *Provider) UpdateStatus(db string, msg string) {
- p.mu.Lock()
- defer p.mu.Unlock()
-
- p.status[db] = msg
-}
-
-func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return 0, sql.ErrDatabaseNotFound.New(db)
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return 0, err
- }
-
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
- if !ok {
- return 0, nil
- }
-
- return priStats.RowCount(), nil
-}
-
-func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) {
- statDb, ok := p.getStatDb(db)
- if !ok {
- return 0, sql.ErrDatabaseNotFound.New(db)
- }
-
- dSess := dsess.DSessFromSess(ctx.Session)
- branch, err := dSess.GetBranch()
- if err != nil {
- return 0, err
- }
-
- var schemaName string
- if schTab, ok := table.(sql.DatabaseSchemaTable); ok {
- schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName())
- }
-
- priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary"))
- if !ok {
- return 0, nil
- }
-
- return priStats.AvgSize(), nil
-}
-
-func (p *Provider) Prune(ctx *sql.Context) error {
- dSess := dsess.DSessFromSess(ctx.Session)
-
- for _, sqlDb := range p.pro.DoltDatabases() {
- dbName := strings.ToLower(sqlDb.Name())
- sqlDb, ok, err := dSess.Provider().SessionDatabase(ctx, dbName)
- if err != nil {
- return err
- }
- if !ok {
- continue
- }
- statDb, ok := p.getStatDb(dbName)
- if !ok {
- continue
- }
-
- // Canceling refresh thread prevents background thread from
- // making progress. Prune should succeed.
- p.CancelRefreshThread(dbName)
-
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
-
- for _, branch := range statDb.Branches() {
- err := func() error {
- // function closure ensures safe defers
- var stats []sql.Statistic
- for _, t := range tables {
- // XXX: avoid races with ANALYZE with the table locks.
- // Either concurrent purge or analyze (or both) will fail.
- if !p.TryLockForUpdate(branch, dbName, t) {
- p.mu.Lock()
- fmt.Println(p.lockedTables)
- p.mu.Unlock()
- return fmt.Errorf("concurrent statistics update and prune; retry prune when update is finished")
- }
- defer p.UnlockTable(branch, dbName, t)
-
- tableStats, err := p.GetTableDoltStats(ctx, branch, dbName, sqlDb.SchemaName(), t)
- if err != nil {
- return err
- }
- stats = append(stats, tableStats...)
- }
-
- if err := p.DropBranchDbStats(ctx, branch, dbName, true); err != nil {
- return err
- }
-
- for _, s := range stats {
- ds, ok := s.(*DoltStats)
- if !ok {
- return fmt.Errorf("unexpected statistics type found: %T", s)
- }
- if err := statDb.SetStat(ctx, branch, ds.Qualifier(), ds); err != nil {
- return err
- }
- }
- if err := statDb.Flush(ctx, branch); err != nil {
- return err
- }
- return nil
- }()
- if err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-func (p *Provider) Purge(ctx *sql.Context) error {
- for _, sqlDb := range p.pro.DoltDatabases() {
- dbName := strings.ToLower(sqlDb.Name())
-
- tables, err := sqlDb.GetTableNames(ctx)
- if err != nil {
- return err
- }
-
- var branches []string
- db, ok := p.getStatDb(dbName)
- if ok {
- // Canceling refresh thread prevents background thread from
- // making progress. Purge should succeed.
- p.CancelRefreshThread(dbName)
-
- branches = db.Branches()
- for _, branch := range branches {
- err := func() error {
- for _, t := range tables {
- // XXX: avoid races with ANALYZE with the table locks.
- // Either concurrent purge or analyze (or both) will fail.
- if !p.TryLockForUpdate(branch, dbName, t) {
- return fmt.Errorf("concurrent statistics update and prune; retry purge when update is finished")
- }
- defer p.UnlockTable(branch, dbName, t)
- }
-
- err := p.DropBranchDbStats(ctx, branch, dbName, true)
- if err != nil {
- return fmt.Errorf("failed to drop stats: %w", err)
- }
- return nil
- }()
- if err != nil {
- return err
- }
- }
- }
-
- // if the database's failed to load, we still want to delete the folder
-
- fs, err := p.pro.FileSystemForDatabase(dbName)
- if err != nil {
- return err
- }
-
- //remove from filesystem
- statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir)
- if err != nil {
- return err
- }
-
- if ok, _ := statsFs.Exists(""); ok {
- if err := statsFs.Delete("", true); err != nil {
- return err
- }
- }
-
- dropDbLoc, err := statsFs.Abs("")
- if err != nil {
- return err
- }
-
- if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil {
- return err
- }
- if len(branches) == 0 {
- // if stats db was invalid on startup, recreate from baseline
- branches = p.getStatsBranches(ctx)
- }
- p.Load(ctx, fs, sqlDb, branches)
- }
- return nil
-}
diff --git a/go/libraries/doltcore/sqle/system_variables.go b/go/libraries/doltcore/sqle/system_variables.go
index 99e6c2f5a9b..6bccab80727 100644
--- a/go/libraries/doltcore/sqle/system_variables.go
+++ b/go/libraries/doltcore/sqle/system_variables.go
@@ -16,6 +16,7 @@ package sqle
import (
"math"
+ "time"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
@@ -219,39 +220,39 @@ var DoltSystemVariables = []sql.SystemVariable{
Default: int8(1),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshEnabled,
+ Name: dsess.DoltStatsEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
- Default: int8(0),
+ Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
+ Default: int8(1),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsBootstrapEnabled,
+ Name: dsess.DoltStatsMemoryOnly,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
+ Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
Default: int8(0),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsMemoryOnly,
+ Name: dsess.DoltStatsJobInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
- Default: int8(0),
+ Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
+ Default: int64(500 * time.Millisecond / time.Millisecond),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshThreshold,
+ Name: dsess.DoltStatsBranchInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
- Default: float64(.5),
+ Type: types.NewSystemIntType(dsess.DoltStatsBranchInterval, 0, math.MaxInt, false),
+ Default: int64(time.Hour / time.Millisecond),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshInterval,
+ Name: dsess.DoltStatsGCInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
- Default: 600,
+ Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
+ Default: int64(time.Hour / time.Millisecond),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,
@@ -446,39 +447,39 @@ func AddDoltSystemVariables() {
Default: int8(0),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshEnabled,
+ Name: dsess.DoltStatsEnabled,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled),
- Default: int8(0),
+ Type: types.NewSystemBoolType(dsess.DoltStatsEnabled),
+ Default: int8(1),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsBootstrapEnabled,
+ Name: dsess.DoltStatsGCInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled),
- Default: int8(0),
+ Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false),
+ Default: int64(time.Hour / time.Millisecond),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsMemoryOnly,
+ Name: dsess.DoltStatsJobInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
- Default: int8(0),
+ Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false),
+ Default: int64(500 * time.Millisecond / time.Millisecond),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshThreshold,
+ Name: dsess.DoltStatsBranchInterval,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10),
- Default: float64(.5),
+ Type: types.NewSystemIntType(dsess.DoltStatsBranchInterval, 0, math.MaxInt, false),
+ Default: int64(time.Hour / time.Millisecond),
},
&sql.MysqlSystemVariable{
- Name: dsess.DoltStatsAutoRefreshInterval,
+ Name: dsess.DoltStatsMemoryOnly,
Dynamic: true,
Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global),
- Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false),
- Default: 120,
+ Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly),
+ Default: int8(0),
},
&sql.MysqlSystemVariable{
Name: dsess.DoltStatsBranches,
diff --git a/go/libraries/doltcore/sqle/tables.go b/go/libraries/doltcore/sqle/tables.go
index e8fb46ea5d1..06765360bff 100644
--- a/go/libraries/doltcore/sqle/tables.go
+++ b/go/libraries/doltcore/sqle/tables.go
@@ -127,12 +127,12 @@ func (t *DoltTable) LookupForExpressions(ctx *sql.Context, exprs ...sql.Expressi
return sql.IndexLookup{}, nil, nil, false, nil
}
- dbState, ok, err := sess.LookupDbState(ctx, t.db.Name())
+ dbState, ok, err := sess.LookupDbState(ctx, t.db.AliasedName())
if err != nil {
return sql.IndexLookup{}, nil, nil, false, nil
}
if !ok {
- return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.Name())
+ return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.AliasedName())
}
var lookupCols []expression.LookupColumn
diff --git a/go/libraries/doltcore/sqle/user_space_database.go b/go/libraries/doltcore/sqle/user_space_database.go
index e54c03b7eb3..c3689e13a61 100644
--- a/go/libraries/doltcore/sqle/user_space_database.go
+++ b/go/libraries/doltcore/sqle/user_space_database.go
@@ -141,6 +141,10 @@ func (db *UserSpaceDatabase) RequestedName() string {
return db.Name()
}
+func (db *UserSpaceDatabase) AliasedName() string {
+ return db.Name()
+}
+
func (db *UserSpaceDatabase) GetSchema(ctx *sql.Context, schemaName string) (sql.DatabaseSchema, bool, error) {
panic(fmt.Sprintf("GetSchema is not implemented for database %T", db))
}
diff --git a/go/libraries/doltcore/remotestorage/internal/circular/buff.go b/go/libraries/utils/circular/buff.go
similarity index 90%
rename from go/libraries/doltcore/remotestorage/internal/circular/buff.go
rename to go/libraries/utils/circular/buff.go
index 2a5ba8866d1..36632a88085 100644
--- a/go/libraries/doltcore/remotestorage/internal/circular/buff.go
+++ b/go/libraries/utils/circular/buff.go
@@ -34,12 +34,20 @@ func (b *Buff[T]) Len() int {
return b.len
}
+func (b *Buff[T]) Cap() int {
+ return cap(b.arr)
+}
+
func (b *Buff[T]) At(i int) T {
+ return *b.at(i)
+}
+
+func (b *Buff[T]) at(i int) *T {
if i >= b.Len() {
panic("At on Buff too small")
}
j := (b.front + i) % len(b.arr)
- return b.arr[j]
+ return &b.arr[j]
}
func (b *Buff[T]) Front() T {
@@ -50,6 +58,9 @@ func (b *Buff[T]) Pop() {
if b.Len() == 0 {
panic("Pop empty Buff")
}
+ // Don't leak entries...
+ var empty T
+ *b.at(0) = empty
b.front = (b.front + 1) % len(b.arr)
b.len -= 1
}
diff --git a/go/libraries/doltcore/remotestorage/internal/circular/buff_test.go b/go/libraries/utils/circular/buff_test.go
similarity index 100%
rename from go/libraries/doltcore/remotestorage/internal/circular/buff_test.go
rename to go/libraries/utils/circular/buff_test.go
diff --git a/go/performance/scripts/dg_sysbench.sh b/go/performance/scripts/dg_sysbench.sh
new file mode 100755
index 00000000000..0ce8ca1927a
--- /dev/null
+++ b/go/performance/scripts/dg_sysbench.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+set -e
+set -o pipefail
+
+SYSBENCH_TEST="oltp_insert_only"
+WORKING_DIR=`mktemp -d`
+PPROF=0
+PORT=5433
+
+# parse options
+# superuser.com/questions/186272/
+while test $# -gt 0
+do
+ case "$1" in
+
+ --new-new) export DOLT_DEFAULT_BIN_FORMAT="__DOLT__" &&
+ export ENABLE_ROW_ITER_2=true
+ ;;
+
+ --no-exchange) export SINGLE_THREAD_FEATURE_FLAG=true
+ ;;
+
+ # benchmark with pprof profiling
+ --pprof) PPROF=1
+ ;;
+
+ # run dolt single threaded
+ --single) export GOMAXPROCS=1
+ ;;
+
+ --row2) export ENABLE_ROW_ITER_2=true
+ ;;
+
+ --journal) export DOLT_ENABLE_CHUNK_JOURNAL=true
+ ;;
+
+ # specify sysbench benchmark
+ *) SYSBENCH_TEST="$1"
+ ;;
+
+ esac
+ shift
+done
+
+if [ ! -d "./sysbench-lua-scripts" ]; then
+ git clone https://github.com/dolthub/sysbench-lua-scripts.git
+fi
+
+# collect custom sysbench scripts
+cp ./sysbench-lua-scripts/*.lua "$WORKING_DIR"
+cd "$WORKING_DIR"
+
+# make a sql-server config file
+cat < dolt-config.yaml
+log_level: "info"
+
+behavior:
+ read_only: false
+
+user:
+ name: "user"
+ password: "pass"
+
+listener:
+ host: "0.0.0.0"
+ port: $PORT
+ read_timeout_millis: 28800000
+ write_timeout_millis: 28800000
+
+data_dir: .
+YAML
+
+# start a server
+mkdir sbtest
+cd sbtest
+doltgres -config="../dolt-config.yaml" 2> prepare.log &
+SERVER_PID="$!"
+
+set -x
+
+sleep 1
+
+ps aux | grep "doltgres"
+lsof -iTCP -sTCP:LISTEN
+echo $SERVER_PID
+psql --port $PORT --host=0.0.0.0 --db=doltgres -c "create database sbtest"
+
+
+# stop it if it crashes
+cleanup() {
+ kill -15 "$SERVER_PID"
+}
+trap cleanup EXIT
+
+# setup benchmark
+echo "benchmark $SYSBENCH_TEST bootstrapping at $WORKING_DIR"
+
+
+sysbench \
+ --db-driver="pgsql" \
+ --pgsql-host="0.0.0.0" \
+ --pgsql-port="$PORT" \
+ --pgsql-user="user" \
+ --pgsql-password="pass" \
+ "$SYSBENCH_TEST" prepare
+
+# restart server to isolate bench run
+kill -15 "$SERVER_PID"
+
+# maybe run with pprof
+if [ "$PPROF" -eq 1 ]; then
+ doltgres --prof cpu -config="../dolt-config.yaml" 2> run.log &
+else
+ doltgres -config="../dolt-config.yaml" 2> run.log &
+fi
+SERVER_PID="$!"
+sleep 1
+
+
+# run benchmark
+echo "benchmark $SYSBENCH_TEST starting at $WORKING_DIR"
+
+sysbench \
+ --db-driver="pgsql" \
+ --pgsql-host="0.0.0.0" \
+ --pgsql-port="$PORT" \
+ --pgsql-user="user" \
+ --pgsql-password="pass" \
+ --db-ps-mode=disable \
+ --time=30 \
+ --db-ps-mode=disable \
+ "$SYSBENCH_TEST" run
+
+unset DOLT_ENABLE_CHUNK_JOURNAL
+unset DOLT_DEFAULT_BIN_FORMAT
+unset ENABLE_ROW_ITER_2
+unset SINGLE_THREAD_FEATURE_FLAG
+unset GOMAXPROCS
+
+echo "benchmark $SYSBENCH_TEST complete at $WORKING_DIR"
+if [ "$PPROF" -eq 1 ]; then
+ # parse run.log to output the profile location
+ head -n1 "$WORKING_DIR/run.log" | cut -d ":" -f 4
+fi
+echo ""
diff --git a/go/performance/utils/benchmark_runner/sysbench.go b/go/performance/utils/benchmark_runner/sysbench.go
index 5953368b5b2..02e637b4920 100644
--- a/go/performance/utils/benchmark_runner/sysbench.go
+++ b/go/performance/utils/benchmark_runner/sysbench.go
@@ -21,9 +21,6 @@ import (
"os/exec"
"path/filepath"
"strings"
- "time"
-
- "github.com/jmoiron/sqlx"
"github.com/google/uuid"
)
@@ -149,10 +146,6 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) {
return nil, err
}
- if err := t.collectStats(ctx); err != nil {
- return nil, err
- }
-
fmt.Println("Running test", t.test.GetName())
rs, err := t.run(ctx)
@@ -162,76 +155,3 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) {
return rs, nil
}
-
-func (t *sysbenchTesterImpl) collectStats(ctx context.Context) error {
- if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") {
- db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/test", t.serverConfig.GetHost(), t.serverConfig.GetPort()))
- if err != nil {
- return err
- }
- return collectStats(ctx, db)
- }
- return nil
-}
-
-func collectStats(ctx context.Context, db *sqlx.DB) error {
- c, err := db.Connx(ctx)
- if err != nil {
- return err
- }
-
- {
- // configuration, restart, and check needs to be in the same session
- tx, err := c.BeginTxx(ctx, nil)
- if err != nil {
- return err
- }
-
- if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_enabled = 1;"); err != nil {
- return err
- }
- if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_interval = 0;"); err != nil {
- return err
- }
- if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"); err != nil {
- return err
- }
- if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"); err != nil {
- return err
- }
- if _, err := tx.Exec("call dolt_stats_restart();"); err != nil {
- return err
- }
-
- rows := map[string]interface{}{"cnt": 0}
- tick := time.NewTicker(5 * time.Second)
- for {
- if rows["cnt"] != 0 {
- fmt.Printf("collected %d histogram buckets\n", rows["cnt"])
- break
- }
- select {
- case <-tick.C:
- res, err := tx.Queryx("select count(*) as cnt from dolt_statistics;")
- if err != nil {
- return err
- }
- if !res.Next() {
- return fmt.Errorf("failed to set statistics")
- }
- if err := res.MapScan(rows); err != nil {
- return err
- }
- if err := res.Close(); err != nil {
- return err
- }
- }
- }
- }
-
- if _, err := c.QueryContext(ctx, "call dolt_stats_stop();"); err != nil {
- return err
- }
-
- return nil
-}
diff --git a/go/performance/utils/benchmark_runner/tpcc.go b/go/performance/utils/benchmark_runner/tpcc.go
index 4c7f01a2444..be265e6b568 100644
--- a/go/performance/utils/benchmark_runner/tpcc.go
+++ b/go/performance/utils/benchmark_runner/tpcc.go
@@ -20,9 +20,6 @@ import (
"os"
"os/exec"
"path/filepath"
- "strings"
-
- "github.com/jmoiron/sqlx"
)
type tpccTesterImpl struct {
@@ -54,17 +51,6 @@ func (t *tpccTesterImpl) outputToResult(output []byte) (*Result, error) {
return OutputToResult(output, t.serverConfig.GetServerType(), t.serverConfig.GetVersion(), t.test.GetName(), t.test.GetId(), t.suiteId, t.config.GetRuntimeOs(), t.config.GetRuntimeGoArch(), t.serverParams, t.test.GetParamsToSlice(), nil, false)
}
-func (t *tpccTesterImpl) collectStats(ctx context.Context) error {
- if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") {
- db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/sbt", t.serverConfig.GetHost(), t.serverConfig.GetPort()))
- if err != nil {
- return err
- }
- return collectStats(ctx, db)
- }
- return nil
-}
-
func (t *tpccTesterImpl) prepare(ctx context.Context) error {
args := t.test.GetPrepareArgs(t.serverConfig)
cmd := exec.CommandContext(ctx, t.tpccCommand, args...)
@@ -119,10 +105,6 @@ func (t *tpccTesterImpl) Test(ctx context.Context) (*Result, error) {
return nil, err
}
- if err := t.collectStats(ctx); err != nil {
- return nil, err
- }
-
fmt.Println("Running test", t.test.GetName())
rs, err := t.run(ctx)
diff --git a/go/store/prolly/tree/mutator.go b/go/store/prolly/tree/mutator.go
index e6474e16cbf..a03d042a4a0 100644
--- a/go/store/prolly/tree/mutator.go
+++ b/go/store/prolly/tree/mutator.go
@@ -17,6 +17,7 @@ package tree
import (
"bytes"
"context"
+ "fmt"
"github.com/dolthub/dolt/go/store/prolly/message"
)
@@ -132,7 +133,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer](
prev := newKey
newKey, newValue = edits.NextMutation(ctx)
if newKey != nil {
- assertTrue(order.Compare(K(newKey), K(prev)) > 0, "expected sorted edits")
+ assertTrue(order.Compare(K(newKey), K(prev)) > 0, "expected sorted edits"+fmt.Sprintf("%v, %v", prev, newKey))
}
}
diff --git a/go/store/prolly/tree/stats.go b/go/store/prolly/tree/stats.go
index 1573d01893d..9611f3b583d 100644
--- a/go/store/prolly/tree/stats.go
+++ b/go/store/prolly/tree/stats.go
@@ -141,6 +141,11 @@ func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m Static
// GetHistogramLevel returns the highest internal level of the tree that has
// more than |low| addresses.
func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) {
+ if cnt, err := m.Count(); err != nil {
+ return nil, err
+ } else if cnt == 0 {
+ return nil, nil
+ }
currentLevel := []Node{m.Root}
level := m.Root.Level()
for len(currentLevel) < low && level > 0 {
diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go
index f92bc8ce1cb..9b3a50ea139 100644
--- a/go/store/val/tuple_builder.go
+++ b/go/store/val/tuple_builder.go
@@ -15,6 +15,8 @@
package val
import (
+ "log"
+ "strconv"
"time"
"github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors"
@@ -77,7 +79,7 @@ func NewTupleBuilder(desc TupleDesc) *TupleBuilder {
func (tb *TupleBuilder) Build(pool pool.BuffPool) (tup Tuple) {
for i, typ := range tb.Desc.Types {
if !typ.Nullable && tb.fields[i] == nil {
- panic("cannot write NULL to non-NULL field")
+ log.Println("cannot write NULL to non-NULL field: " + strconv.Itoa(i) + " " + string(tb.fields[i]))
}
}
return tb.BuildPermissive(pool)
diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go
index bd55519ab35..188c1f98829 100644
--- a/go/store/val/tuple_descriptor.go
+++ b/go/store/val/tuple_descriptor.go
@@ -639,7 +639,7 @@ func (td TupleDesc) formatValue(enc Encoding, i int, value []byte) string {
case StringAddrEnc:
return hex.EncodeToString(value)
case CommitAddrEnc:
- return hex.EncodeToString(value)
+ return hash.New(value).String()[:5]
case CellEnc:
return hex.EncodeToString(value)
case ExtendedEnc:
diff --git a/integration-tests/bats/stats.bats b/integration-tests/bats/stats.bats
index 7cc4c4bf9f2..03ac1eefbcf 100644
--- a/integration-tests/bats/stats.bats
+++ b/integration-tests/bats/stats.bats
@@ -22,6 +22,7 @@ SQL
cd $TMPDIRS/repo2
dolt init
+ dolt sql -q "SET @@PERSIST.dolt_stats_job_interval = 100"
dolt sql < data.py
-import random
-import os
+## bats test_tags=no_lambda
+#@test "stats: boostrap abort over 1mm rows" {
+ #cat < data.py
+#import random
+#import os
-rows = 2*1000*1000+1
+#rows = 2*1000*1000+1
-def main():
- f = open("data.csv","w+")
- f.write("id,hostname\n")
+#def main():
+ #f = open("data.csv","w+")
+ #f.write("id,hostname\n")
- for i in range(rows):
- hostname = random.getrandbits(100)
- f.write(f"{i},{hostname}\n")
- if i % (500*1000) == 0:
- print("row :", i)
- f.flush()
+ #for i in range(rows):
+ #hostname = random.getrandbits(100)
+ #f.write(f"{i},{hostname}\n")
+ #if i % (500*1000) == 0:
+ #print("row :", i)
+ #f.flush()
- f.close()
+ #f.close()
-if __name__ == "__main__":
- main()
-EOF
+#if __name__ == "__main__":
+ #main()
+#EOF
- mkdir repo3
- cd repo3
- python3 ../data.py
+ #mkdir repo3
+ #cd repo3
+ #python3 ../data.py
- dolt init
- dolt sql -q "create table f (id int primary key, hostname int)"
- dolt table import -u --continue f data.csv
+ #dolt init
+ #dolt sql -q "create table f (id int primary key, hostname int)"
+ #dolt table import -u --continue f data.csv
- dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;"
+ #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;"
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false
- [ "${lines[2]}" = "0" ]
-}
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false
+ #[ "${lines[2]}" = "0" ]
+#}
-@test "stats: stats delete index schema change" {
- cd repo2
+#@test "stats: stats delete index schema change" {
+ #cd repo2
- dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
- dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
+ #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
+ #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
- dolt sql -q "insert into xy values (0,0), (1,1)"
- dolt sql -q "analyze table xy"
+ #dolt sql -q "insert into xy values (0,0), (1,1)"
+ #dolt sql -q "analyze table xy"
- # stats OK after analyze
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [ "${lines[1]}" = "2" ]
+ ## stats OK after analyze
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[ "${lines[1]}" = "2" ]
- dolt sql -q "alter table xy drop index y"
+ #dolt sql -q "alter table xy drop index y"
- # load after schema change should purge
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [ "${lines[1]}" = "0" ]
+ ## load after schema change should purge
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[ "${lines[1]}" = "0" ]
- dolt sql -q "analyze table xy"
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [ "${lines[1]}" = "1" ]
-}
+ #dolt sql -q "analyze table xy"
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[ "${lines[1]}" = "1" ]
+#}
-@test "stats: stats recreate table without index" {
- cd repo2
+#@test "stats: stats recreate table without index" {
+ #cd repo2
- dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
- dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
+ #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;"
+ #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;"
- dolt sql -q "insert into xy values (0,0), (1,1)"
- dolt sql -q "analyze table xy"
+ #dolt sql -q "insert into xy values (0,0), (1,1)"
+ #dolt sql -q "analyze table xy"
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [ "${lines[1]}" = "2" ]
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[ "${lines[1]}" = "2" ]
- dolt sql -q "drop table xy"
- dolt sql -q "create table xy (x int primary key, y int)"
- dolt sql -q "insert into xy values (0,0), (1,1)"
+ #dolt sql -q "drop table xy"
+ #dolt sql -q "create table xy (x int primary key, y int)"
+ #dolt sql -q "insert into xy values (0,0), (1,1)"
- # make sure no stats
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [ "${lines[1]}" = "0" ]
+ ## make sure no stats
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[ "${lines[1]}" = "0" ]
- dolt sql -q "analyze table xy"
+ #dolt sql -q "analyze table xy"
- run dolt sql -r csv -q "select count(*) from dolt_statistics"
- [ "$status" -eq 0 ]
- [ "${lines[1]}" = "1" ]
+ #run dolt sql -r csv -q "select count(*) from dolt_statistics"
+ #[ "$status" -eq 0 ]
+ #[ "${lines[1]}" = "1" ]
- stop_sql_server
-}
+ #stop_sql_server
+#}