diff --git a/go/cmd/dolt/commands/engine/sqlengine.go b/go/cmd/dolt/commands/engine/sqlengine.go index d7f9ee10fcf..85c3772eec7 100644 --- a/go/cmd/dolt/commands/engine/sqlengine.go +++ b/go/cmd/dolt/commands/engine/sqlengine.go @@ -16,10 +16,10 @@ package engine import ( "context" - "fmt" "os" "strconv" "strings" + "time" gms "github.com/dolthub/go-mysql-server" "github.com/dolthub/go-mysql-server/eventscheduler" @@ -43,7 +43,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/mysql_file_handler" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" "github.com/dolthub/dolt/go/libraries/utils/config" @@ -189,7 +188,13 @@ func NewSqlEngine( "authentication_dolt_jwt": NewAuthenticateDoltJWTPlugin(config.JwksConfig), }) - statsPro := statspro.NewProvider(pro, statsnoms.NewNomsStatsFactory(mrEnv.RemoteDialProvider())) + var statsPro sql.StatsProvider + _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsEnabled) + if enabled.(int8) == 1 { + statsPro = statspro.NewStatsCoord(ctx, pro, sqlEngine.NewDefaultContext, logrus.StandardLogger(), bThreads, mrEnv.GetEnv(mrEnv.GetFirstDatabase())) + } else { + statsPro = statspro.StatsNoop{} + } engine.Analyzer.Catalog.StatsProvider = statsPro engine.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{}) @@ -206,8 +211,28 @@ func NewSqlEngine( // configuring stats depends on sessionBuilder // sessionBuilder needs ref to statsProv - if err = statsPro.Configure(ctx, sqlEngine.NewDefaultContext, bThreads, dbs); err != nil { - fmt.Fprintln(cli.CliErr, err) + if sc, ok := statsPro.(*statspro.StatsCoord); ok { + //sc.Debug = true + _, memOnly, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly) + sc.SetMemOnly(memOnly.(int8) == 1) + + typ, jobI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsJobInterval) + _, gcI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsGCInterval) + _, brI, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranchInterval) + + jobInterval, _, _ := typ.GetType().Convert(jobI) + gcInterval, _, _ := typ.GetType().Convert(gcI) + brInterval, _, _ := typ.GetType().Convert(brI) + + sc.SetTimers( + jobInterval.(int64)*int64(time.Millisecond), + gcInterval.(int64)*int64(time.Millisecond), + brInterval.(int64)*int64(time.Millisecond)) + + err := sc.Init(ctx, dbs, false) + if err != nil { + return nil, err + } } // Load MySQL Db information diff --git a/go/cmd/dolt/commands/sqlserver/server.go b/go/cmd/dolt/commands/sqlserver/server.go index 33d253a377a..3ae8cb70e45 100644 --- a/go/cmd/dolt/commands/sqlserver/server.go +++ b/go/cmd/dolt/commands/sqlserver/server.go @@ -55,6 +55,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/cluster" _ "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dfunctions" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/sqlserver" "github.com/dolthub/dolt/go/libraries/events" "github.com/dolthub/dolt/go/libraries/utils/config" @@ -260,23 +261,23 @@ func ConfigureServices( var sqlEngine *engine.SqlEngine InitSqlEngine := &svcs.AnonService{ InitF: func(ctx context.Context) (err error) { - if statsOn, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsAutoRefreshEnabled); err != nil { - // Auto-stats is off by default for every command except - // sql-server. Unless the config specifies a specific - // behavior, enable server stats collection. - sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, 1) - } else if statsOn != "0" { - // do not bootstrap if auto-stats enabled - } else if _, err := mrEnv.Config().GetString(env.SqlServerGlobalsPrefix + "." + dsess.DoltStatsBootstrapEnabled); err != nil { - // If we've disabled stats collection and config does not - // specify bootstrap behavior, enable bootstrapping. - sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 1) - } sqlEngine, err = engine.NewSqlEngine( ctx, mrEnv, config, ) + if sc, ok := sqlEngine.GetUnderlyingEngine().Analyzer.Catalog.StatsProvider.(*statspro.StatsCoord); ok { + sqlCtx, err := sqlEngine.NewDefaultContext(ctx) + if err != nil { + return err + } + if sc == nil { + return fmt.Errorf("unexpected nil stats coord") + } + if err = sc.Restart(sqlCtx); err != nil { + return err + } + } return err }, StopF: func() error { diff --git a/go/go.mod b/go/go.mod index c9f89bfbc1d..35147a1df1d 100644 --- a/go/go.mod +++ b/go/go.mod @@ -56,7 +56,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.19.1-0.20250207201905-b3a4c87c4fdc + github.com/dolthub/go-mysql-server v0.19.1-0.20250210190204-a73f126157ef github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/dolthub/swiss v0.1.0 github.com/esote/minmaxheap v1.0.0 @@ -91,7 +91,6 @@ require ( golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 golang.org/x/text v0.21.0 gonum.org/v1/plot v0.11.0 - gopkg.in/errgo.v2 v2.1.0 gopkg.in/go-jose/go-jose.v2 v2.6.3 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go/go.sum b/go/go.sum index 5038e3a6cf6..8dbf9a92389 100644 --- a/go/go.sum +++ b/go/go.sum @@ -179,8 +179,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90 h1:Sni8jrP0sy/w9ZYXoff4g/ixe+7bFCZlfCqXKJSU+zM= github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA= -github.com/dolthub/go-mysql-server v0.19.1-0.20250207201905-b3a4c87c4fdc h1:SdN7GRPtaqmLwfi6cVcyF4Oc8FbFUJ+mwsFRV++6iH4= -github.com/dolthub/go-mysql-server v0.19.1-0.20250207201905-b3a4c87c4fdc/go.mod h1:QQxZvPHOtycbC2bVmqmT6/Fov2g1/T1Rtm76wLd/Y1E= +github.com/dolthub/go-mysql-server v0.19.1-0.20250210190204-a73f126157ef h1:vQ5zStRSgdem9R3BtUhkVa5Q8DhSrYs9ReRVFIq86so= +github.com/dolthub/go-mysql-server v0.19.1-0.20250210190204-a73f126157ef/go.mod h1:QQxZvPHOtycbC2bVmqmT6/Fov2g1/T1Rtm76wLd/Y1E= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE= @@ -1153,7 +1153,6 @@ gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= diff --git a/go/go.work.sum b/go/go.work.sum index 71f195420ad..37de10bbf10 100644 --- a/go/go.work.sum +++ b/go/go.work.sum @@ -404,8 +404,6 @@ github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byA github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA= github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE= -github.com/esote/minmaxheap v1.0.0 h1:rgA7StnXXpZG6qlM0S7pUmEv1KpWe32rYT4x8J8ntaA= -github.com/esote/minmaxheap v1.0.0/go.mod h1:Ln8+i7fS1k3PLgZI2JAo0iA1as95QnIYiGCrqSJ5FZk= github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk= github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db h1:gb2Z18BhTPJPpLQWj4T+rfKHYCHxRHCtRxhKKjRidVw= @@ -732,6 +730,7 @@ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/cheggaaa/pb.v1 v1.0.25 h1:Ev7yu1/f6+d+b3pi5vPdRPc6nNtP1umSfcWiEfRqv6I= +gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index e27a397915c..699f3ec0734 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -2052,7 +2052,7 @@ func (ddb *DoltDB) AddStash(ctx context.Context, head *Commit, stash RootValue, return err } -func (ddb *DoltDB) SetStatisics(ctx context.Context, branch string, addr hash.Hash) error { +func (ddb *DoltDB) SetStatistics(ctx context.Context, branch string, addr hash.Hash) error { statsDs, err := ddb.db.GetDataset(ctx, ref.NewStatsRef(branch).String()) if err != nil { return err diff --git a/go/libraries/doltcore/remotestorage/internal/reliable/chan.go b/go/libraries/doltcore/remotestorage/internal/reliable/chan.go index 8beeb5ea61a..c975e7e52f9 100644 --- a/go/libraries/doltcore/remotestorage/internal/reliable/chan.go +++ b/go/libraries/doltcore/remotestorage/internal/reliable/chan.go @@ -15,7 +15,7 @@ package reliable import ( - "github.com/dolthub/dolt/go/libraries/doltcore/remotestorage/internal/circular" + "github.com/dolthub/dolt/go/libraries/utils/circular" ) // A reliable.Chan is a type of channel transformer which can be used to build diff --git a/go/libraries/doltcore/schema/statistic.go b/go/libraries/doltcore/schema/statistic.go index 1879951e10b..88215a7443a 100644 --- a/go/libraries/doltcore/schema/statistic.go +++ b/go/libraries/doltcore/schema/statistic.go @@ -24,12 +24,12 @@ import ( const StatsVersion int64 = 1 const ( - StatsQualifierColName = "qualifier" StatsDbColName = "database_name" StatsTableColName = "table_name" StatsIndexColName = "index_name" - StatsPositionColName = "position" + StatsBranchName = "branch" StatsCommitHashColName = "commit_hash" + StatsPrefixLenName = "prefix_len" StatsRowCountColName = "row_count" StatsDistinctCountColName = "distinct_count" StatsNullCountColName = "null_count" @@ -42,7 +42,7 @@ const ( StatsMcv2ColName = "mcv2" StatsMcv3ColName = "mcv3" StatsMcv4ColName = "mcv4" - StatsMcvCountsColName = "mcvCounts" + StatsMcvCountsColName = "mcv_counts" StatsVersionColName = "version" ) @@ -52,6 +52,7 @@ const ( StatsIndexTag StatsPositionTag StatsVersionTag + StatsPrefixLenTag StatsCommitHashTag StatsRowCountTag StatsDistinctCountTag @@ -71,9 +72,9 @@ const ( func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema { return sql.PrimaryKeySchema{ Schema: sql.Schema{ - &sql.Column{Name: StatsDbColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName}, - &sql.Column{Name: StatsTableColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName}, - &sql.Column{Name: StatsIndexColName, Type: types.Text, PrimaryKey: true, DatabaseSource: dbName}, + &sql.Column{Name: StatsDbColName, Type: types.Text, DatabaseSource: dbName}, + &sql.Column{Name: StatsTableColName, Type: types.Text, DatabaseSource: dbName}, + &sql.Column{Name: StatsIndexColName, Type: types.Text, DatabaseSource: dbName}, &sql.Column{Name: StatsRowCountColName, Type: types.Int64, DatabaseSource: dbName}, &sql.Column{Name: StatsDistinctCountColName, Type: types.Int64, DatabaseSource: dbName}, &sql.Column{Name: StatsNullCountColName, Type: types.Int64, DatabaseSource: dbName}, @@ -88,7 +89,6 @@ func StatsTableSqlSchema(dbName string) sql.PrimaryKeySchema { &sql.Column{Name: StatsMcv4ColName, Type: types.Text, DatabaseSource: dbName}, &sql.Column{Name: StatsMcvCountsColName, Type: types.Text, DatabaseSource: dbName}, }, - PkOrdinals: []int{0, 1}, } } @@ -96,20 +96,14 @@ var StatsTableDoltSchema = StatsTableDoltSchemaGen() func StatsTableDoltSchemaGen() Schema { colColl := NewColCollection( - NewColumn(StatsDbColName, StatsDbTag, stypes.StringKind, true, NotNullConstraint{}), - NewColumn(StatsTableColName, StatsTableTag, stypes.StringKind, true, NotNullConstraint{}), - NewColumn(StatsIndexColName, StatsIndexTag, stypes.StringKind, true, NotNullConstraint{}), - NewColumn(StatsPositionColName, StatsPositionTag, stypes.IntKind, true, NotNullConstraint{}), + NewColumn(StatsPrefixLenName, StatsPrefixLenTag, stypes.IntKind, true, NotNullConstraint{}), + NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, true, NotNullConstraint{}), NewColumn(StatsVersionColName, StatsVersionTag, stypes.IntKind, false, NotNullConstraint{}), - NewColumn(StatsCommitHashColName, StatsCommitHashTag, stypes.StringKind, false, NotNullConstraint{}), NewColumn(StatsRowCountColName, StatsRowCountTag, stypes.IntKind, false, NotNullConstraint{}), NewColumn(StatsDistinctCountColName, StatsDistinctCountTag, stypes.IntKind, false, NotNullConstraint{}), NewColumn(StatsNullCountColName, StatsNullCountTag, stypes.IntKind, false, NotNullConstraint{}), - NewColumn(StatsColumnsColName, StatsColumnsTag, stypes.StringKind, false, NotNullConstraint{}), - NewColumn(StatsTypesColName, StatsTypesTag, stypes.StringKind, false, NotNullConstraint{}), NewColumn(StatsUpperBoundColName, StatsUpperBoundTag, stypes.StringKind, false, NotNullConstraint{}), NewColumn(StatsUpperBoundCntColName, StatsUpperBoundCntTag, stypes.IntKind, false, NotNullConstraint{}), - NewColumn(StatsCreatedAtColName, StatsCreatedAtTag, stypes.TimestampKind, false, NotNullConstraint{}), NewColumn(StatsMcv1ColName, StatsMcv1Tag, stypes.StringKind, false), NewColumn(StatsMcv2ColName, StatsMcv2Tag, stypes.StringKind, false), NewColumn(StatsMcv3ColName, StatsMcv3Tag, stypes.StringKind, false), diff --git a/go/libraries/doltcore/sqle/clusterdb/database.go b/go/libraries/doltcore/sqle/clusterdb/database.go index dd741a9a205..4577d2f3c4d 100644 --- a/go/libraries/doltcore/sqle/clusterdb/database.go +++ b/go/libraries/doltcore/sqle/clusterdb/database.go @@ -162,6 +162,10 @@ func (db database) RequestedName() string { return db.Name() } +func (db database) AliasedName() string { + return db.Name() +} + type noopRepoStateWriter struct{} var _ env.RepoStateWriter = noopRepoStateWriter{} diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go index f75e5f52997..10c5e154999 100644 --- a/go/libraries/doltcore/sqle/database.go +++ b/go/libraries/doltcore/sqle/database.go @@ -694,6 +694,9 @@ func (db Database) getTableInsensitive(ctx *sql.Context, head *doltdb.Commit, ds if err != nil { return nil, false, err } + if branch == "" { + branch = db.Revision() + } dt, found = dtables.NewStatisticsTable(ctx, db.Name(), db.schemaName, branch, tables), true case doltdb.ProceduresTableName: found = true diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index 37c4affbb05..9e22ef72aeb 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -970,7 +970,7 @@ func (p *DoltDatabaseProvider) databaseForRevision(ctx *sql.Context, revisionQua } } - db, err := revisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName) + db, err := RevisionDbForBranch(ctx, srcDb, resolvedRevSpec, requestedName) // preserve original user case in the case of not found if sql.ErrDatabaseNotFound.Is(err) { return nil, false, sql.ErrDatabaseNotFound.New(revisionQualifiedName) @@ -1511,8 +1511,8 @@ func isTag(ctx context.Context, db dsess.SqlDatabase, tagName string) (string, b return "", false, nil } -// revisionDbForBranch returns a new database that is tied to the branch named by revSpec -func revisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) { +// RevisionDbForBranch returns a new database that is tied to the branch named by revSpec +func RevisionDbForBranch(ctx context.Context, srcDb dsess.SqlDatabase, revSpec string, requestedName string) (dsess.SqlDatabase, error) { static := staticRepoState{ branch: ref.NewBranchRef(revSpec), RepoStateWriter: srcDb.DbData().Rsw, diff --git a/go/libraries/doltcore/sqle/dprocedures/init.go b/go/libraries/doltcore/sqle/dprocedures/init.go index 499d4209886..5a00fcb39c2 100644 --- a/go/libraries/doltcore/sqle/dprocedures/init.go +++ b/go/libraries/doltcore/sqle/dprocedures/init.go @@ -47,12 +47,13 @@ var DoltProcedures = []sql.ExternalStoredProcedureDetails{ {Name: "dolt_tag", Schema: int64Schema("status"), Function: doltTag}, {Name: "dolt_verify_constraints", Schema: int64Schema("violations"), Function: doltVerifyConstraints}, - {Name: "dolt_stats_drop", Schema: statsFuncSchema, Function: statsFunc(statsDrop)}, {Name: "dolt_stats_restart", Schema: statsFuncSchema, Function: statsFunc(statsRestart)}, {Name: "dolt_stats_stop", Schema: statsFuncSchema, Function: statsFunc(statsStop)}, - {Name: "dolt_stats_status", Schema: statsFuncSchema, Function: statsFunc(statsStatus)}, - {Name: "dolt_stats_prune", Schema: statsFuncSchema, Function: statsFunc(statsPrune)}, + {Name: "dolt_stats_info", Schema: statsFuncSchema, Function: statsFunc(statsInfo)}, {Name: "dolt_stats_purge", Schema: statsFuncSchema, Function: statsFunc(statsPurge)}, + {Name: "dolt_stats_wait", Schema: statsFuncSchema, Function: statsFunc(statsWait)}, + {Name: "dolt_stats_gc", Schema: statsFuncSchema, Function: statsFunc(statsGc)}, + {Name: "dolt_stats_timers", Schema: statsFuncSchema, Function: statsFunc(statsTimers)}, } // stringSchema returns a non-nullable schema with all columns as LONGTEXT. diff --git a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go index 139bec5e5d2..f8cc95850d2 100644 --- a/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go +++ b/go/libraries/doltcore/sqle/dprocedures/stats_funcs.go @@ -15,14 +15,14 @@ package dprocedures import ( + "context" + "encoding/json" "fmt" - "strings" + "strconv" "github.com/dolthub/go-mysql-server/sql" gmstypes "github.com/dolthub/go-mysql-server/sql/types" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/ref" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" ) @@ -34,9 +34,16 @@ var statsFuncSchema = []*sql.Column{ }, } -func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) { - return func(ctx *sql.Context, args ...string) (sql.RowIter, error) { - res, err := fn(ctx) +const OkResult = "Ok" + +func statsFunc(fn func(ctx *sql.Context, args ...string) (interface{}, error)) func(ctx *sql.Context, args ...string) (sql.RowIter, error) { + return func(ctx *sql.Context, args ...string) (iter sql.RowIter, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("stats function unexpectedly panicked: %s", r) + } + }() + res, err := fn(ctx, args...) if err != nil { return nil, err } @@ -44,124 +51,168 @@ func statsFunc(fn func(ctx *sql.Context) (interface{}, error)) func(ctx *sql.Con } } -// AutoRefreshStatsProvider is a sql.StatsProvider that exposes hooks for +type StatsInfo struct { + DbCnt int `json:"dbCnt"` + ReadCnt int `json:"readCnt"` + Active bool `json:"active"` + DbSeedCnt int `json:"dbSeedCnt"` + StorageBucketCnt int `json:"storageBucketCnt"` + CachedBucketCnt int `json:"cachedBucketCnt"` + CachedBoundCnt int `json:"cachedBoundCnt"` + CachedTemplateCnt int `json:"cachedTemplateCnt"` + StatCnt int `json:"statCnt"` + GcCounter int `json:"gcCounter"` + SyncCounter int `json:"syncCounter"` +} + +func (si StatsInfo) ToJson() string { + jsonData, err := json.Marshal(si) + if err != nil { + return "" + } + return string(jsonData) +} + +// ToggableStats is a sql.StatsProvider that exposes hooks for // observing and manipulating background database auto refresh threads. -type AutoRefreshStatsProvider interface { +type ToggableStats interface { sql.StatsProvider - CancelRefreshThread(string) - StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error - ThreadStatus(string) string - Prune(ctx *sql.Context) error + //FlushQueue(ctx context.Context) error + Restart(context.Context) error + Stop(context.Context) error + Info(ctx context.Context) (StatsInfo, error) Purge(ctx *sql.Context) error + WaitForDbSync(ctx *sql.Context) error + Gc(ctx *sql.Context) error + //ValidateState(ctx context.Context) error + //Init(context.Context, []dsess.SqlDatabase, bool) error + SetTimers(int64, int64, int64) } type BranchStatsProvider interface { DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error } -// statsRestart tries to stop and then start a refresh thread -func statsRestart(ctx *sql.Context) (interface{}, error) { +// statsRestart flushes the current job queue and re-inits all +// statistic databases. +func statsRestart(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) statsPro := dSess.StatsProvider() - dbName := strings.ToLower(ctx.GetCurrentDatabase()) - if afp, ok := statsPro.(AutoRefreshStatsProvider); ok { - pro := dSess.Provider() - newFs, err := pro.FileSystemForDatabase(dbName) - if err != nil { - return nil, fmt.Errorf("failed to restart stats collection: %w", err) + if afp, ok := statsPro.(ToggableStats); ok { + if err := afp.Restart(ctx); err != nil { + return nil, err } - dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, newFs, pro.DbFactoryUrl(), "TODO") + return OkResult, nil + } + return nil, fmt.Errorf("provider does not implement ToggableStats") +} - sqlDb, ok := pro.BaseDatabase(ctx, dbName) - if !ok { - return nil, fmt.Errorf("failed to restart stats collection: database not found: %s", dbName) +// statsInfo returns the last update for a stats thread +func statsInfo(ctx *sql.Context, _ ...string) (interface{}, error) { + dSess := dsess.DSessFromSess(ctx.Session) + pro := dSess.StatsProvider() + if afp, ok := pro.(ToggableStats); ok { + info, err := afp.Info(ctx) + if err != nil { + return nil, err } + return info.ToJson(), nil + } + return nil, fmt.Errorf("provider does not implement ToggableStats") +} - afp.CancelRefreshThread(dbName) - - err = afp.StartRefreshThread(ctx, pro, dbName, dEnv, sqlDb) - if err != nil { - return nil, fmt.Errorf("failed to restart collection: %w", err) +// statsWait blocks until the job queue executes two full loops +// of instructions, which will (1) pick up and (2) commit new +// sets of index-bucket dependencies. +func statsWait(ctx *sql.Context, _ ...string) (interface{}, error) { + dSess := dsess.DSessFromSess(ctx.Session) + pro := dSess.StatsProvider() + if afp, ok := pro.(ToggableStats); ok { + if err := afp.WaitForDbSync(ctx); err != nil { + return nil, err } - return fmt.Sprintf("restarted stats collection: %s", ref.StatsRef{}.String()), nil + return OkResult, nil } - return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider") + return nil, fmt.Errorf("provider does not implement ToggableStats") } -// statsStatus returns the last update for a stats thread -func statsStatus(ctx *sql.Context) (interface{}, error) { +// statsGc rewrites the cache to only include objects reachable +// by the current root value. +func statsGc(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - dbName := strings.ToLower(ctx.GetCurrentDatabase()) pro := dSess.StatsProvider() - if afp, ok := pro.(AutoRefreshStatsProvider); ok { - return afp.ThreadStatus(dbName), nil + if afp, ok := pro.(ToggableStats); ok { + if err := afp.Gc(ctx); err != nil { + return nil, err + } + return OkResult, nil } - return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider") + return nil, fmt.Errorf("provider does not implement ToggableStats") } -// statsStop cancels a refresh thread -func statsStop(ctx *sql.Context) (interface{}, error) { +// statsStop flushes the job queue and leaves the stats provider +// in a paused state. +func statsStop(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) statsPro := dSess.StatsProvider() - dbName := strings.ToLower(ctx.GetCurrentDatabase()) - if afp, ok := statsPro.(AutoRefreshStatsProvider); ok { - afp.CancelRefreshThread(dbName) - return fmt.Sprintf("stopped thread: %s", dbName), nil + if afp, ok := statsPro.(ToggableStats); ok { + if err := afp.Stop(ctx); err != nil { + return nil, err + } + return OkResult, nil } - return nil, fmt.Errorf("provider does not implement AutoRefreshStatsProvider") + return nil, fmt.Errorf("provider does not implement ToggableStats") } -// statsDrop deletes the stats ref -func statsDrop(ctx *sql.Context) (interface{}, error) { +// statsPurge flushes the job queue, deletes the current caches +// and storage targets, re-initializes the tracked database +// states, and returns with stats collection paused. +func statsPurge(ctx *sql.Context, _ ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - pro := dSess.StatsProvider() - dbName := strings.ToLower(ctx.GetCurrentDatabase()) + pro, ok := dSess.StatsProvider().(ToggableStats) + if !ok { + return nil, fmt.Errorf("stats not persisted, cannot purge") + } - branch, err := dSess.GetBranch() + err := pro.Stop(ctx) if err != nil { - return nil, fmt.Errorf("failed to drop stats: %w", err) + return nil, fmt.Errorf("failed to flush queue: %w", err) } - if afp, ok := pro.(AutoRefreshStatsProvider); ok { - // currently unsafe to drop stats while running refresh - afp.CancelRefreshThread(dbName) - } - if bsp, ok := pro.(BranchStatsProvider); ok { - err := bsp.DropBranchDbStats(ctx, branch, dbName, true) - if err != nil { - return nil, fmt.Errorf("failed to drop stats: %w", err) - } + if err := pro.Purge(ctx); err != nil { + return "failed to purge stats", err } - return fmt.Sprintf("deleted stats ref for %s", dbName), nil + return OkResult, nil } -// statsPrune replaces the current disk contents with only the currently -// tracked in memory statistics. -func statsPrune(ctx *sql.Context) (interface{}, error) { +// statsTimers updates the stats timers, which go into effect after the next restart. +func statsTimers(ctx *sql.Context, args ...string) (interface{}, error) { dSess := dsess.DSessFromSess(ctx.Session) - pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider) - if !ok { - return nil, fmt.Errorf("stats not persisted, cannot purge") + statsPro := dSess.StatsProvider() + + if len(args) != 3 { + return nil, fmt.Errorf("expected timer arguments (ns): (job, gc, sync)") } - if err := pro.Prune(ctx); err != nil { - return "failed to prune stats databases", err + job, err := strconv.ParseInt(args[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("interval timer must be positive intergers") } - return "pruned all stats databases", nil -} - -// statsPurge removes the stats database from disk -func statsPurge(ctx *sql.Context) (interface{}, error) { - dSess := dsess.DSessFromSess(ctx.Session) - pro, ok := dSess.StatsProvider().(AutoRefreshStatsProvider) - if !ok { - return nil, fmt.Errorf("stats not persisted, cannot purge") + gc, err := strconv.ParseInt(args[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("interval timer must be positive intergers") } - if err := pro.Purge(ctx); err != nil { - return "failed to purged databases", err + sync, err := strconv.ParseInt(args[2], 10, 64) + if err != nil { + return nil, fmt.Errorf("interval arguments must be positive intergers") + } + + if afp, ok := statsPro.(ToggableStats); ok { + afp.SetTimers(job, gc, sync) + return OkResult, nil } - return "purged all database stats", nil + return nil, fmt.Errorf("provider does not implement ToggableStats") } diff --git a/go/libraries/doltcore/sqle/dsess/session_db_provider.go b/go/libraries/doltcore/sqle/dsess/session_db_provider.go index 3d4969bb114..05e72971747 100644 --- a/go/libraries/doltcore/sqle/dsess/session_db_provider.go +++ b/go/libraries/doltcore/sqle/dsess/session_db_provider.go @@ -122,6 +122,7 @@ type SqlDatabase interface { sql.Database sql.SchemaDatabase sql.DatabaseSchema + sql.AliasedDatabase SessionDatabase RevisionDatabase diff --git a/go/libraries/doltcore/sqle/dsess/variables.go b/go/libraries/doltcore/sqle/dsess/variables.go index 848ed2218ec..0d8e0fd4edb 100644 --- a/go/libraries/doltcore/sqle/dsess/variables.go +++ b/go/libraries/doltcore/sqle/dsess/variables.go @@ -59,12 +59,12 @@ const ( DoltClusterRoleEpochVariable = "dolt_cluster_role_epoch" DoltClusterAckWritesTimeoutSecs = "dolt_cluster_ack_writes_timeout_secs" - DoltStatsAutoRefreshEnabled = "dolt_stats_auto_refresh_enabled" - DoltStatsBootstrapEnabled = "dolt_stats_bootstrap_enabled" - DoltStatsAutoRefreshThreshold = "dolt_stats_auto_refresh_threshold" - DoltStatsAutoRefreshInterval = "dolt_stats_auto_refresh_interval" - DoltStatsMemoryOnly = "dolt_stats_memory_only" - DoltStatsBranches = "dolt_stats_branches" + DoltStatsEnabled = "dolt_stats_enabled" + DoltStatsMemoryOnly = "dolt_stats_memory_only" + DoltStatsBranches = "dolt_stats_branches" + DoltStatsJobInterval = "dolt_stats_job_interval" + DoltStatsBranchInterval = "dolt_stats_branch_interval" + DoltStatsGCInterval = "dolt_stats_gc_interval" ) const URLTemplateDatabasePlaceholder = "{database}" diff --git a/go/libraries/doltcore/sqle/dtables/statistics_table.go b/go/libraries/doltcore/sqle/dtables/statistics_table.go index fda463e7e49..f73cfaf192b 100644 --- a/go/libraries/doltcore/sqle/dtables/statistics_table.go +++ b/go/libraries/doltcore/sqle/dtables/statistics_table.go @@ -68,7 +68,7 @@ func (st *StatisticsTable) DataLength(ctx *sql.Context) (uint64, error) { } type BranchStatsProvider interface { - GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) + GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) } // RowCount implements sql.StatisticsTable @@ -119,14 +119,19 @@ func (st *StatisticsTable) Partitions(*sql.Context) (sql.PartitionIter, error) { // PartitionRows is a sql.Table interface function that gets a row iterator for a partition func (st *StatisticsTable) PartitionRows(ctx *sql.Context, _ sql.Partition) (sql.RowIter, error) { dSess := dsess.DSessFromSess(ctx.Session) - statsPro := dSess.StatsProvider().(BranchStatsProvider) + statsPro, ok := dSess.StatsProvider().(BranchStatsProvider) + if !ok { + return sql.RowsToRowIter(), nil + } var dStats []sql.Statistic for _, table := range st.tableNames { dbStats, err := statsPro.GetTableDoltStats(ctx, st.branch, st.dbName, st.schemaName, table) if err != nil { return nil, err } - dStats = append(dStats, dbStats...) + for _, s := range dbStats { + dStats = append(dStats, s) + } } return stats.NewStatsIter(ctx, dStats...) } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go index 0c76b18fa51..903408a5f62 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go @@ -1452,11 +1452,6 @@ func TestStatBranchTests(t *testing.T) { RunStatBranchTests(t, harness) } -func TestStatsFunctions(t *testing.T) { - harness := newDoltEnginetestHarness(t) - RunStatsFunctionsTest(t, harness) -} - func TestDiffTableFunction(t *testing.T) { harness := newDoltEnginetestHarness(t) RunDiffTableFunctionTests(t, harness) @@ -1663,11 +1658,6 @@ func TestStatsStorage(t *testing.T) { RunStatsStorageTests(t, h) } -func TestStatsIOWithoutReload(t *testing.T) { - h := newDoltEnginetestHarness(t) - RunStatsIOTestsWithoutReload(t, h) -} - func TestJoinStats(t *testing.T) { h := newDoltEnginetestHarness(t) RunJoinStatsTests(t, h) @@ -1953,22 +1943,23 @@ func TestStatsAutoRefreshConcurrency(t *testing.T) { // Setting an interval of 0 and a threshold of 0 will result // in the stats being updated after every operation - intervalSec := time.Duration(0) - thresholdf64 := 0. - bThreads := sql.NewBackgroundThreads() - branches := []string{"main"} - statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.Provider) + //intervalSec := time.Duration(0) + //thresholdf64 := 0. + //bThreads := sql.NewBackgroundThreads() + //branches := []string{"main"} + statsProv := engine.EngineAnalyzer().Catalog.StatsProvider.(*statspro.StatsCoord) // it is important to use new sessions for this test, to avoid working root conflicts readCtx := enginetest.NewSession(harness) writeCtx := enginetest.NewSession(harness) refreshCtx := enginetest.NewSession(harness) - newCtx := func(context.Context) (*sql.Context, error) { - return refreshCtx, nil - } - err := statsProv.InitAutoRefreshWithParams(newCtx, sqlDb.Name(), bThreads, intervalSec, thresholdf64, branches) + fs, err := engine.EngineAnalyzer().Catalog.DbProvider.(*sqle.DoltDatabaseProvider).FileSystemForDatabase(sqlDb.AliasedName()) + require.NoError(t, err) + + statsProv.AddFs(sqlDb, fs) require.NoError(t, err) + <-done execQ := func(ctx *sql.Context, q string, id int, tag string) { _, iter, _, err := engine.Query(ctx, q) diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go index efd221635f4..0747f743b1b 100755 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_tests.go @@ -268,7 +268,6 @@ func RunQueryTestPlans(t *testing.T, harness DoltEnginetestHarness) { } defer harness.Close() - sql.SystemVariables.SetGlobal(dsess.DoltStatsBootstrapEnabled, 0) enginetest.TestQueryPlans(t, harness, queries.PlanTests) } @@ -1165,21 +1164,6 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) enginetest.QueryEngine { return e } -func RunStatsFunctionsTest(t *testing.T, harness DoltEnginetestHarness) { - defer harness.Close() - for _, test := range StatProcTests { - t.Run(test.Name, func(t *testing.T) { - // reset engine so provider statistics are clean - harness = harness.NewHarness(t).WithConfigureStats(true) - harness.Setup(setup.MydbData) - harness.SkipSetupCommit() - e := mustNewEngine(t, harness) - defer e.Close() - enginetest.TestScriptWithEngine(t, e, harness, test) - }) - } -} - func RunDiffTableFunctionTests(t *testing.T, harness DoltEnginetestHarness) { for _, test := range DiffTableFunctionScriptTests { t.Run(test.Name, func(t *testing.T) { @@ -1562,27 +1546,12 @@ func RunStatsStorageTests(t *testing.T, h DoltEnginetestHarness) { for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) { func() { h = h.NewHarness(t).WithConfigureStats(true) - defer h.Close() e := mustNewEngine(t, h) if enginetest.IsServerEngine(e) { return } defer e.Close() - TestProviderReloadScriptWithEngine(t, e, h, script) - }() - } -} - -func RunStatsIOTestsWithoutReload(t *testing.T, h DoltEnginetestHarness) { - for _, script := range append(DoltStatsStorageTests, DoltHistogramTests...) { - func() { - h = h.NewHarness(t).WithConfigureStats(true) defer h.Close() - e := mustNewEngine(t, h) - if enginetest.IsServerEngine(e) { - return - } - defer e.Close() enginetest.TestScriptWithEngine(t, e, h, script) }() } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go index 4dbcd2be283..20bd5de519e 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go @@ -20,6 +20,7 @@ import ( "runtime" "strings" "testing" + "time" gms "github.com/dolthub/go-mysql-server" "github.com/dolthub/go-mysql-server/enginetest" @@ -36,7 +37,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/kvexec" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" "github.com/dolthub/dolt/go/libraries/utils/filesys" @@ -46,7 +46,7 @@ import ( type DoltHarness struct { t *testing.T provider dsess.DoltDatabaseProvider - statsPro sql.StatsProvider + statsPro *statspro.StatsCoord multiRepoEnv *env.MultiRepoEnv session *dsess.DoltSession branchControl *branch_control.Controller @@ -246,13 +246,20 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { d.gcSafepointController = dsess.NewGCSafepointController() - statsProv := statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider())) - d.statsPro = statsProv - var err error d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, d.gcSafepointController) require.NoError(t, err) + sqlCtx := enginetest.NewContext(d) + bThreads := sql.NewBackgroundThreads() + + ctxGen := func(ctx context.Context) (*sql.Context, error) { + return d.NewContextWithClient(sql.Client{Address: "localhost", User: "root"}), nil + } + statsPro := statspro.NewStatsCoord(ctx, doltProvider, ctxGen, sqlCtx.Session.GetLogger().Logger, bThreads, d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase())) + statsPro.SetTimers(int64(1*time.Nanosecond), int64(1*time.Second), int64(1*time.Second)) + d.statsPro = statsPro + e, err := enginetest.NewEngine(t, d, d.provider, d.setupData, d.statsPro) if err != nil { return nil, err @@ -260,8 +267,8 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { e.Analyzer.ExecBuilder = rowexec.NewOverrideBuilder(kvexec.Builder{}) d.engine = e - sqlCtx := enginetest.NewContext(d) databases := pro.AllDatabases(sqlCtx) + d.setupDbs = make(map[string]struct{}) var dbs []string for _, db := range databases { @@ -281,23 +288,21 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { require.NoError(t, err) } - if d.configureStats { - bThreads := sql.NewBackgroundThreads() - e = e.WithBackgroundThreads(bThreads) + e = e.WithBackgroundThreads(bThreads) - dSess := dsess.DSessFromSess(sqlCtx.Session) - dbCache := dSess.DatabaseCache(sqlCtx) - - dsessDbs := make([]dsess.SqlDatabase, len(dbs)) - for i, dbName := range dbs { - dsessDbs[i], _ = dbCache.GetCachedRevisionDb(fmt.Sprintf("%s/main", dbName), dbName) + if d.configureStats { + var dsessDbs []dsess.SqlDatabase + for _, db := range databases { + if sqlDb, ok := db.(dsess.SqlDatabase); ok { + dsessDbs = append(dsessDbs, sqlDb) + } } - - ctxFact := func(context.Context) (*sql.Context, error) { - sess := d.newSessionWithClient(sql.Client{Address: "localhost", User: "root"}) - return sql.NewContext(context.Background(), sql.WithSession(sess)), nil + if err := statsPro.Init(ctx, dsessDbs, false); err != nil { + return nil, err } - if err = statsProv.Configure(sqlCtx, ctxFact, bThreads, dsessDbs); err != nil { + + err = statsPro.Restart(ctx) + if err != nil { return nil, err } @@ -309,13 +314,20 @@ func (d *DoltHarness) NewEngine(t *testing.T) (enginetest.QueryEngine, error) { } // Reset the mysql DB table to a clean state for this new engine + ctx := enginetest.NewContext(d) + d.engine.Analyzer.Catalog.MySQLDb = mysql_db.CreateEmptyMySQLDb() d.engine.Analyzer.Catalog.MySQLDb.AddRootAccount() - d.engine.Analyzer.Catalog.StatsProvider = statspro.NewProvider(d.provider.(*sqle.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider())) - var err error - sqlCtx := enginetest.NewContext(d) - e, err := enginetest.RunSetupScripts(sqlCtx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation()) + ctxGen := func(ctx context.Context) (*sql.Context, error) { + return d.NewContext(), nil + } + bThreads := sql.NewBackgroundThreads() + statsPro := statspro.NewStatsCoord(ctx, d.provider.(*sqle.DoltDatabaseProvider), ctxGen, ctx.Session.GetLogger().Logger, bThreads, d.multiRepoEnv.GetEnv(d.multiRepoEnv.GetFirstDatabase())) + require.NoError(t, statsPro.Restart(ctx)) + d.engine.Analyzer.Catalog.StatsProvider = statsPro + + e, err := enginetest.RunSetupScripts(ctx, d.engine, d.resetScripts(), d.SupportsNativeIndexCreation()) // Get a fresh session after running setup scripts, since some setup scripts can change the session state d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), d.provider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil) @@ -430,7 +442,6 @@ func (d *DoltHarness) NewDatabases(names ...string) []sql.Database { doltProvider, ok := pro.(*sqle.DoltDatabaseProvider) require.True(d.t, ok) d.provider = doltProvider - d.statsPro = statspro.NewProvider(doltProvider, statsnoms.NewNomsStatsFactory(d.multiRepoEnv.RemoteDialProvider())) var err error d.session, err = dsess.NewDoltSession(enginetest.NewBaseSession(), doltProvider, d.multiRepoEnv.Config(), d.branchControl, d.statsPro, writer.NewWriteSession, nil) @@ -502,7 +513,10 @@ func (d *DoltHarness) NewDatabaseProvider() sql.MutableDatabaseProvider { func (d *DoltHarness) Close() { d.closeProvider() - sql.SystemVariables.SetGlobal(dsess.DoltStatsAutoRefreshEnabled, int8(0)) + if d.statsPro != nil { + d.statsPro.Close() + } + sql.SystemVariables.SetGlobal(dsess.DoltStatsEnabled, int8(0)) } func (d *DoltHarness) closeProvider() { diff --git a/go/libraries/doltcore/sqle/enginetest/stats_queries.go b/go/libraries/doltcore/sqle/enginetest/stats_queries.go index fedb7297d5f..3efc0a41288 100644 --- a/go/libraries/doltcore/sqle/enginetest/stats_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/stats_queries.go @@ -17,17 +17,12 @@ package enginetest import ( "fmt" "strings" - "testing" - gms "github.com/dolthub/go-mysql-server" - "github.com/dolthub/go-mysql-server/enginetest" "github.com/dolthub/go-mysql-server/enginetest/queries" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" - "github.com/stretchr/testify/require" "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" ) // fillerVarchar pushes the tree into level 3 @@ -510,8 +505,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{ { Name: "incremental stats deletes auto", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", "insert into xy select x, 1, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", "analyze table xy", @@ -525,10 +518,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{ Query: "delete from xy where x > 500", }, { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", + Query: "analyze table xy", }, { Query: "select count(*) from dolt_statistics group by table_name, index_name", @@ -540,8 +530,6 @@ var DoltStatsStorageTests = []queries.ScriptTest{ // https://github.com/dolthub/dolt/issues/8504 Name: "alter index column type", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", "CREATE table xy (x bigint primary key, y varchar(16))", "insert into xy values (0,'0'), (1,'1'), (2,'2')", "analyze table xy", @@ -569,78 +557,9 @@ var DoltStatsStorageTests = []queries.ScriptTest{ }, }, }, - { - Name: "differentiate table cases", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches ='main'", - "CREATE table XY (x bigint primary key, y varchar(16))", - "insert into XY values (0,'0'), (1,'1'), (2,'2')", - "analyze table XY", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "2"}}, - }, - }, - }, - { - Name: "deleted table loads OK", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches ='main'", - "CREATE table xy (x bigint primary key, y varchar(16))", - "insert into xy values (0,'0'), (1,'1'), (2,'2')", - "analyze table xy", - "CREATE table uv (u bigint primary key, v varchar(16))", - "insert into uv values (0,'0'), (1,'1'), (2,'2')", - "analyze table uv", - "drop table uv", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "2"}}, - }, - }, - }, - { - Name: "differentiate branch names", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches ='main,feat'", - "CREATE table xy (x bigint primary key, y varchar(16))", - "insert into xy values (0,'0'), (1,'1'), (2,'2')", - "analyze table xy", - "call dolt_checkout('-b', 'feat')", - "CREATE table xy (x varchar(16) primary key, y bigint, z bigint)", - "insert into xy values (3,'3',3)", - "analyze table xy", - "call dolt_checkout('main')", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "2"}}, - }, - { - Query: "call dolt_checkout('feat')", - }, - { - Query: "select table_name, upper_bound from dolt_statistics", - Expected: []sql.Row{{"xy", "3"}}, - }, - }, - }, { Name: "drop primary key", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", "CREATE table xy (x bigint primary key, y varchar(16))", "insert into xy values (0,'0'), (1,'1'), (2,'2')", "analyze table xy", @@ -657,10 +576,7 @@ var DoltStatsStorageTests = []queries.ScriptTest{ Query: "insert into xy values ('3', '3')", }, { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.2)", + Query: "analyze table xy", }, { Query: "select count(*) from dolt_statistics group by table_name, index_name", @@ -674,9 +590,6 @@ var StatBranchTests = []queries.ScriptTest{ { Name: "multi branch stats", SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0;", - "set @@PERSIST.dolt_stats_branches = 'main,feat';", "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", "insert into xy values (0,0,'a'), (1,0,'a'), (2,0,'a'), (3,0,'a'), (4,1,'a'), (5,2,'a')", "call dolt_commit('-Am', 'xy')", @@ -688,10 +601,7 @@ var StatBranchTests = []queries.ScriptTest{ }, Assertions: []queries.ScriptTestAssertion{ { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", + Query: "call dolt_stats_sync()", }, { Query: "select table_name, index_name, row_count from dolt_statistics", @@ -726,7 +636,7 @@ var StatBranchTests = []queries.ScriptTest{ Query: "call dolt_commit('-am', 'cm')", }, { - Query: "select sleep(.1)", + Query: "call dolt_stats_wait()", }, { Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", @@ -744,30 +654,6 @@ var StatBranchTests = []queries.ScriptTest{ {"xy", "y", uint64(6)}, }, }, - { - Query: "call dolt_checkout('feat')", - }, - { - Query: "call dolt_stats_stop()", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "call dolt_stats_drop()", - }, - { - Query: "select table_name, index_name, row_count from dolt_statistics as of 'feat'", - Expected: []sql.Row{}, - }, - { - // we dropped 'feat', not 'main' - Query: "select table_name, index_name, row_count from dolt_statistics as of 'main'", - Expected: []sql.Row{ - {"xy", "primary", uint64(6)}, - {"xy", "y", uint64(6)}, - }, - }, }, }, { @@ -787,302 +673,3 @@ var StatBranchTests = []queries.ScriptTest{ }, }, } - -var StatProcTests = []queries.ScriptTest{ - { - Name: "deleting stats removes information_schema access point", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (0,0,0)", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "analyze table xy", - }, - { - Query: "select count(*) from information_schema.column_statistics", - Expected: []sql.Row{{2}}, - }, - { - Query: "call dolt_stats_drop()", - }, - { - Query: "select count(*) from information_schema.column_statistics", - Expected: []sql.Row{{0}}, - }, - }, - }, - { - Name: "restart empty stats panic", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "analyze table xy", - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{0}}, - }, - { - Query: "set @@GLOBAL.dolt_stats_auto_refresh_threshold = 0", - Expected: []sql.Row{{}}, - }, - { - Query: "set @@GLOBAL.dolt_stats_auto_refresh_interval = 0", - Expected: []sql.Row{{}}, - }, - { - // don't panic - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "insert into xy values (0,0,0)", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{2}}, - }, - }, - }, - { - Name: "basic start, status, stop loop", - SetUpScript: []string{ - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (0,0,'a'), (2,0,'a'), (4,1,'a'), (6,2,'a')", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{0}}, - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"no active stats thread"}}, - }, - // set refresh interval arbitrarily high to avoid updating when we restart - { - Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 100000;", - Expected: []sql.Row{{}}, - }, - { - Query: "set @@PERSIST.dolt_stats_auto_refresh_threshold = 0", - Expected: []sql.Row{{}}, - }, - { - Query: "call dolt_stats_restart()", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"restarted thread: mydb"}}, - }, - { - Query: "set @@PERSIST.dolt_stats_auto_refresh_interval = 0;", - Expected: []sql.Row{{}}, - }, - // new restart picks up 0-interval, will start refreshing immediately - { - Query: "call dolt_stats_restart()", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"refreshed mydb"}}, - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{2}}, - }, - // kill refresh thread - { - Query: "call dolt_stats_stop()", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"cancelled thread: mydb"}}, - }, - // insert without refresh thread will not update stats - { - Query: "insert into xy values (1,0,'a'), (3,0,'a'), (5,2,'a'), (7,1,'a')", - }, - { - Query: "select sleep(.1)", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"cancelled thread: mydb"}}, - }, - // manual analyze will update stats - { - Query: "analyze table xy", - Expected: []sql.Row{{"xy", "analyze", "status", "OK"}}, - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"refreshed mydb"}}, - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{2}}, - }, - // kill refresh thread and delete stats ref - { - Query: "call dolt_stats_drop()", - }, - { - Query: "call dolt_stats_status()", - Expected: []sql.Row{{"dropped"}}, - }, - { - Query: "select count(*) from dolt_statistics", - Expected: []sql.Row{{0}}, - }, - }, - }, - { - Name: "test purge", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;", - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');", - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt", - Expected: []sql.Row{{1}, {1}}, - }, - { - Query: "call dolt_stats_purge()", - }, - { - Query: "select count(*) from dolt_statistics;", - Expected: []sql.Row{{0}}, - }, - }, - }, - { - Name: "test prune", - SetUpScript: []string{ - "set @@PERSIST.dolt_stats_auto_refresh_enabled = 0;", - "CREATE table xy (x bigint primary key, y int, z varchar(500), key(y,z));", - "insert into xy values (1, 1, 'a'), (2,1,'a'), (3,1,'a'), (4,2,'b'), (5,2,'b'), (6,3,'c');", - "analyze table xy", - }, - Assertions: []queries.ScriptTestAssertion{ - { - Query: "select count(*) as cnt from dolt_statistics group by table_name, index_name order by cnt", - Expected: []sql.Row{{1}, {1}}, - }, - { - Query: "call dolt_stats_prune()", - }, - { - Query: "select count(*) from dolt_statistics;", - Expected: []sql.Row{{2}}, - }, - }, - }, -} - -// TestProviderReloadScriptWithEngine runs the test script given with the engine provided. -func TestProviderReloadScriptWithEngine(t *testing.T, e enginetest.QueryEngine, harness enginetest.Harness, script queries.ScriptTest) { - ctx := enginetest.NewContext(harness) - err := enginetest.CreateNewConnectionForServerEngine(ctx, e) - require.NoError(t, err, nil) - - t.Run(script.Name, func(t *testing.T) { - for _, statement := range script.SetUpScript { - if sh, ok := harness.(enginetest.SkippingHarness); ok { - if sh.SkipQueryTest(statement) { - t.Skip() - } - } - ctx = ctx.WithQuery(statement) - enginetest.RunQueryWithContext(t, e, harness, ctx, statement) - } - - assertions := script.Assertions - if len(assertions) == 0 { - assertions = []queries.ScriptTestAssertion{ - { - Query: script.Query, - Expected: script.Expected, - ExpectedErr: script.ExpectedErr, - ExpectedIndexes: script.ExpectedIndexes, - }, - } - } - - { - // reload provider, get disk stats - eng, ok := e.(*gms.Engine) - if !ok { - t.Errorf("expected *gms.Engine but found: %T", e) - } - - branches := eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).TrackedBranches("mydb") - brCopy := make([]string, len(branches)) - copy(brCopy, branches) - err := eng.Analyzer.Catalog.StatsProvider.DropDbStats(ctx, "mydb", false) - require.NoError(t, err) - for _, branch := range brCopy { - err = eng.Analyzer.Catalog.StatsProvider.(*statspro.Provider).LoadStats(ctx, "mydb", branch) - require.NoError(t, err) - } - } - - for _, assertion := range assertions { - t.Run(assertion.Query, func(t *testing.T) { - if assertion.NewSession { - th, ok := harness.(enginetest.TransactionHarness) - require.True(t, ok, "ScriptTestAssertion requested a NewSession, "+ - "but harness doesn't implement TransactionHarness") - ctx = th.NewSession() - } - - if sh, ok := harness.(enginetest.SkippingHarness); ok && sh.SkipQueryTest(assertion.Query) { - t.Skip() - } - if assertion.Skip { - t.Skip() - } - - if assertion.ExpectedErr != nil { - enginetest.AssertErr(t, e, harness, assertion.Query, nil, assertion.ExpectedErr) - } else if assertion.ExpectedErrStr != "" { - enginetest.AssertErrWithCtx(t, e, harness, ctx, assertion.Query, nil, nil, assertion.ExpectedErrStr) - } else if assertion.ExpectedWarning != 0 { - enginetest.AssertWarningAndTestQuery(t, e, nil, harness, assertion.Query, - assertion.Expected, nil, assertion.ExpectedWarning, assertion.ExpectedWarningsCount, - assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck) - } else if assertion.SkipResultsCheck { - enginetest.RunQueryWithContext(t, e, harness, nil, assertion.Query) - } else if assertion.CheckIndexedAccess { - enginetest.TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings) - } else { - var expected = assertion.Expected - if enginetest.IsServerEngine(e) && assertion.SkipResultCheckOnServerEngine { - // TODO: remove this check in the future - expected = nil - } - enginetest.TestQueryWithContext(t, ctx, e, harness, assertion.Query, expected, assertion.ExpectedColumns, assertion.Bindings, nil) - } - }) - } - }) -} - -func mustNewStatQual(s string) sql.StatQualifier { - qual, _ := sql.NewQualifierFromString(s) - return qual -} diff --git a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go index 1bd7861ebaf..ac93e62d733 100644 --- a/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go +++ b/go/libraries/doltcore/sqle/logictest/dolt/doltharness.go @@ -33,7 +33,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/env" dsql "github.com/dolthub/dolt/go/libraries/doltcore/sqle" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statsnoms" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" "github.com/dolthub/dolt/go/libraries/utils/filesys" @@ -145,7 +144,7 @@ func innerInit(h *DoltHarness, dEnv *env.DoltEnv) error { } config, _ := dEnv.Config.GetConfig(env.GlobalConfig) - sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.NewProvider(pro.(*dsql.DoltDatabaseProvider), statsnoms.NewNomsStatsFactory(env.NewGRPCDialProviderFromDoltEnv(dEnv))), dsess.NewGCSafepointController()) + sqlCtx := dsql.NewTestSQLCtxWithProvider(ctx, pro, config, statspro.StatsNoop{}, dsess.NewGCSafepointController()) h.sess = sqlCtx.Session.(*dsess.DoltSession) dbs := h.engine.Analyzer.Catalog.AllDatabases(sqlCtx) diff --git a/go/libraries/doltcore/sqle/sqlddl_test.go b/go/libraries/doltcore/sqle/sqlddl_test.go index e0cea917018..7088079dd86 100644 --- a/go/libraries/doltcore/sqle/sqlddl_test.go +++ b/go/libraries/doltcore/sqle/sqlddl_test.go @@ -1128,6 +1128,7 @@ func newTestEngine(ctx context.Context, dEnv *env.DoltEnv) (*gms.Engine, *sql.Co IsServerLocked: false, }), sqlCtx } + func TestIndexOverwrite(t *testing.T) { ctx := context.Background() dEnv := dtestutils.CreateTestEnv() diff --git a/go/libraries/doltcore/sqle/statsnoms/database.go b/go/libraries/doltcore/sqle/statsnoms/database.go deleted file mode 100644 index 6a972a3b103..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/database.go +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "context" - "errors" - "fmt" - "path" - "strings" - "sync" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" - "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" - "github.com/dolthub/dolt/go/libraries/utils/earl" - "github.com/dolthub/dolt/go/libraries/utils/filesys" - "github.com/dolthub/dolt/go/store/datas" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/types" -) - -func NewNomsStatsFactory(dialPro dbfactory.GRPCDialProvider) *NomsStatsFactory { - return &NomsStatsFactory{dialPro: dialPro} -} - -type NomsStatsFactory struct { - dialPro dbfactory.GRPCDialProvider -} - -var _ statspro.StatsFactory = NomsStatsFactory{} - -func (sf NomsStatsFactory) Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (statspro.Database, error) { - params := make(map[string]interface{}) - params[dbfactory.GRPCDialProviderParam] = sf.dialPro - - var urlPath string - u, err := earl.Parse(prov.DbFactoryUrl()) - if u.Scheme == dbfactory.MemScheme { - urlPath = path.Join(prov.DbFactoryUrl(), dbfactory.DoltDataDir) - } else if u.Scheme == dbfactory.FileScheme { - urlPath = doltdb.LocalDirDoltDB - } - - statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) - if err != nil { - return nil, err - } - - var dEnv *env.DoltEnv - exists, isDir := statsFs.Exists("") - if !exists { - err := statsFs.MkDirs("") - if err != nil { - return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error()) - } - - dEnv = env.Load(context.Background(), hdp, statsFs, urlPath, "test") - sess := dsess.DSessFromSess(ctx.Session) - err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), prov.DefaultBranch()) - if err != nil { - return nil, err - } - } else if !isDir { - return nil, fmt.Errorf("file exists where the dolt stats directory should be") - } else { - dEnv = env.LoadWithoutDB(ctx, hdp, statsFs, "", "") - } - - dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params) - - deaf := dEnv.DbEaFactory(ctx) - - tmpDir, err := dEnv.TempTableFilesDir() - if err != nil { - return nil, err - } - opts := editor.Options{ - Deaf: deaf, - Tempdir: tmpDir, - } - statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts) - if err != nil { - return nil, err - } - return NewNomsStats(sourceDb, statsDb), nil -} - -func NewNomsStats(sourceDb, statsDb dsess.SqlDatabase) *NomsStatsDatabase { - return &NomsStatsDatabase{mu: &sync.Mutex{}, destDb: statsDb, sourceDb: sourceDb} -} - -type dbStats map[sql.StatQualifier]*statspro.DoltStats - -type NomsStatsDatabase struct { - mu *sync.Mutex - destDb dsess.SqlDatabase - sourceDb dsess.SqlDatabase - stats []dbStats - branches []string - tableHashes []map[string]hash.Hash - schemaHashes []map[string]hash.Hash - dirty []*prolly.MutableMap -} - -var _ statspro.Database = (*NomsStatsDatabase)(nil) - -func (n *NomsStatsDatabase) Close() error { - return n.destDb.DbData().Ddb.Close() -} - -func (n *NomsStatsDatabase) Branches() []string { - return n.branches -} - -func (n *NomsStatsDatabase) LoadBranchStats(ctx *sql.Context, branch string) error { - branchQDbName := statspro.BranchQualifiedDatabase(n.sourceDb.Name(), branch) - - dSess := dsess.DSessFromSess(ctx.Session) - sqlDb, err := dSess.Provider().Database(ctx, branchQDbName) - if err != nil { - ctx.GetLogger().Debugf("statistics load: branch not found: %s; `call dolt_stats_prune()` to delete stale statistics", branch) - return nil - } - branchQDb, ok := sqlDb.(dsess.SqlDatabase) - if !ok { - return fmt.Errorf("branch/database not found: %s", branchQDbName) - } - - if ok, err := n.SchemaChange(ctx, branch, branchQDb); err != nil { - return err - } else if ok { - ctx.GetLogger().Debugf("statistics load: detected schema change incompatility, purging %s/%s", branch, n.sourceDb.Name()) - if err := n.DeleteBranchStats(ctx, branch, true); err != nil { - return err - } - } - - statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, branch) - if errors.Is(err, doltdb.ErrNoStatistics) { - return n.trackBranch(ctx, branch) - } else if errors.Is(err, datas.ErrNoBranchStats) { - return n.trackBranch(ctx, branch) - } else if err != nil { - return err - } - if cnt, err := statsMap.Count(); err != nil { - return err - } else if cnt == 0 { - return n.trackBranch(ctx, branch) - } - - doltStats, err := loadStats(ctx, branchQDb, statsMap) - if err != nil { - return err - } - n.branches = append(n.branches, branch) - n.stats = append(n.stats, doltStats) - n.dirty = append(n.dirty, nil) - n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash)) - n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash)) - return nil -} - -func (n *NomsStatsDatabase) SchemaChange(ctx *sql.Context, branch string, branchQDb dsess.SqlDatabase) (bool, error) { - root, err := branchQDb.GetRoot(ctx) - if err != nil { - return false, err - } - tables, err := branchQDb.GetTableNames(ctx) - if err != nil { - return false, err - } - - var keys []string - var schHashes []hash.Hash - for _, tableName := range tables { - table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tableName}) - if err != nil { - return false, err - } - if !ok { - return false, nil - } - curHash, err := table.GetSchemaHash(ctx) - if err != nil { - return false, err - } - - keys = append(keys, n.schemaTupleKey(branch, tableName)) - schHashes = append(schHashes, curHash) - } - - ddb := n.destDb.DbData().Ddb - var schemaChange bool - for i, key := range keys { - curHash := schHashes[i] - if val, ok, err := ddb.GetTuple(ctx, key); err != nil { - return false, err - } else if ok { - oldHash := hash.Parse(string(val)) - if !ok || !oldHash.Equal(curHash) { - schemaChange = true - break - } - } - } - if schemaChange { - for _, key := range keys { - ddb.DeleteTuple(ctx, key) - } - return true, nil - } - return false, nil -} - -func (n *NomsStatsDatabase) getBranchStats(branch string) dbStats { - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - return n.stats[i] - } - } - return nil -} - -func (n *NomsStatsDatabase) GetStat(branch string, qual sql.StatQualifier) (*statspro.DoltStats, bool) { - n.mu.Lock() - defer n.mu.Unlock() - stats := n.getBranchStats(branch) - ret, ok := stats[qual] - return ret, ok -} - -func (n *NomsStatsDatabase) ListStatQuals(branch string) []sql.StatQualifier { - n.mu.Lock() - defer n.mu.Unlock() - stats := n.getBranchStats(branch) - var ret []sql.StatQualifier - for qual, _ := range stats { - ret = append(ret, qual) - } - return ret -} - -func (n *NomsStatsDatabase) setStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error { - var statsMap *prolly.MutableMap - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - n.stats[i][qual] = stats - if n.dirty[i] == nil { - if err := n.initMutable(ctx, i); err != nil { - return err - } - } - statsMap = n.dirty[i] - } - } - if statsMap == nil { - if err := n.trackBranch(ctx, branch); err != nil { - return err - } - statsMap = n.dirty[len(n.branches)-1] - n.stats[len(n.branches)-1][qual] = stats - } - - return n.replaceStats(ctx, statsMap, stats) -} -func (n *NomsStatsDatabase) SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *statspro.DoltStats) error { - n.mu.Lock() - defer n.mu.Unlock() - - return n.setStat(ctx, branch, qual, stats) -} - -func (n *NomsStatsDatabase) trackBranch(ctx context.Context, branch string) error { - n.branches = append(n.branches, branch) - n.stats = append(n.stats, make(dbStats)) - n.tableHashes = append(n.tableHashes, make(map[string]hash.Hash)) - n.schemaHashes = append(n.schemaHashes, make(map[string]hash.Hash)) - - kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors() - newMap, err := prolly.NewMapFromTuples(ctx, n.destDb.DbData().Ddb.NodeStore(), kd, vd) - if err != nil { - return err - } - n.dirty = append(n.dirty, newMap.Mutate()) - return n.destDb.DbData().Ddb.SetStatisics(ctx, branch, newMap.HashOf()) -} - -func (n *NomsStatsDatabase) initMutable(ctx context.Context, i int) error { - statsMap, err := n.destDb.DbData().Ddb.GetStatistics(ctx, n.branches[i]) - if err != nil { - return err - } - n.dirty[i] = statsMap.Mutate() - return nil -} - -func (n *NomsStatsDatabase) DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) { - n.mu.Lock() - defer n.mu.Unlock() - - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - for _, qual := range quals { - ctx.GetLogger().Debugf("statistics refresh: deleting index statistics: %s/%s", branch, qual) - delete(n.stats[i], qual) - } - } - } -} - -func (n *NomsStatsDatabase) DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error { - n.mu.Lock() - defer n.mu.Unlock() - - ctx.GetLogger().Debugf("statistics refresh: deleting branch statistics: %s", branch) - - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - n.branches = append(n.branches[:i], n.branches[i+1:]...) - n.dirty = append(n.dirty[:i], n.dirty[i+1:]...) - n.stats = append(n.stats[:i], n.stats[i+1:]...) - n.tableHashes = append(n.tableHashes[:i], n.tableHashes[i+1:]...) - n.schemaHashes = append(n.schemaHashes[:i], n.schemaHashes[i+1:]...) - } - } - if flush { - return n.destDb.DbData().Ddb.DropStatisics(ctx, branch) - } - return nil -} - -func (n *NomsStatsDatabase) ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error { - n.mu.Lock() - defer n.mu.Unlock() - - var dbStat dbStats - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - // naive merge the new with old - dbStat = n.stats[i] - } - } - - if dbStat == nil { - if err := n.trackBranch(ctx, branch); err != nil { - return err - } - dbStat = n.stats[len(n.branches)-1] - } - - if _, ok := dbStat[qual]; ok { - oldChunks := dbStat[qual].Hist - targetBuckets, err := statspro.MergeNewChunks(targetHashes, oldChunks, newChunks) - if err != nil { - return err - } - newStat, err := dbStat[qual].WithHistogram(targetBuckets) - if err != nil { - return err - } - dbStat[qual] = newStat.(*statspro.DoltStats) - } else { - dbStat[qual] = statspro.NewDoltStats() - } - dbStat[qual].Chunks = targetHashes - dbStat[qual].UpdateActive() - - // let |n.SetStats| update memory and disk - return n.setStat(ctx, branch, qual, dbStat[qual]) -} - -func (n *NomsStatsDatabase) Flush(ctx context.Context, branch string) error { - n.mu.Lock() - defer n.mu.Unlock() - - for i, b := range n.branches { - if strings.EqualFold(b, branch) { - if n.dirty[i] != nil { - flushedMap, err := n.dirty[i].Map(ctx) - if err != nil { - return err - } - n.dirty[i] = nil - if err := n.destDb.DbData().Ddb.SetStatisics(ctx, branch, flushedMap.HashOf()); err != nil { - return err - } - return nil - } - } - } - return nil -} - -func (n *NomsStatsDatabase) GetTableHash(branch, tableName string) hash.Hash { - n.mu.Lock() - defer n.mu.Unlock() - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - return n.tableHashes[i][tableName] - } - } - return hash.Hash{} -} - -func (n *NomsStatsDatabase) SetTableHash(branch, tableName string, h hash.Hash) { - n.mu.Lock() - defer n.mu.Unlock() - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - n.tableHashes[i][tableName] = h - break - } - } -} - -func (n *NomsStatsDatabase) GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) { - n.mu.Lock() - defer n.mu.Unlock() - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - return n.schemaHashes[i][tableName], nil - } - if val, ok, err := n.destDb.DbData().Ddb.GetTuple(ctx, n.schemaTupleKey(branch, tableName)); ok { - if err != nil { - return hash.Hash{}, err - } - h := hash.Parse(string(val)) - n.schemaHashes[i][tableName] = h - return h, nil - } else if err != nil { - return hash.Hash{}, err - } - break - } - return hash.Hash{}, nil -} - -func (n *NomsStatsDatabase) schemaTupleKey(branch, tableName string) string { - return n.sourceDb.Name() + "/" + branch + "/" + tableName -} - -func (n *NomsStatsDatabase) SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error { - n.mu.Lock() - defer n.mu.Unlock() - branchIdx := -1 - for i, b := range n.branches { - if strings.EqualFold(branch, b) { - branchIdx = i - break - } - } - if branchIdx < 0 { - branchIdx = len(n.branches) - if err := n.trackBranch(ctx, branch); err != nil { - return err - } - } - - n.schemaHashes[branchIdx][tableName] = h - key := n.schemaTupleKey(branch, tableName) - if err := n.destDb.DbData().Ddb.DeleteTuple(ctx, key); err != doltdb.ErrTupleNotFound { - return err - } - - return n.destDb.DbData().Ddb.SetTuple(ctx, key, []byte(h.String())) -} diff --git a/go/libraries/doltcore/sqle/statsnoms/iter.go b/go/libraries/doltcore/sqle/statsnoms/iter.go deleted file mode 100644 index 59b9456eed6..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/iter.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "fmt" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/planbuilder" - "gopkg.in/errgo.v2/errors" - - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/val" -) - -var ErrIncompatibleVersion = errors.New("client stats version mismatch") - -func NewStatsIter(ctx *sql.Context, schemaName string, m prolly.Map) (*statsIter, error) { - iter, err := m.IterAll(ctx) - if err != nil { - return nil, err - } - kd, vd := m.Descriptors() - keyBuilder := val.NewTupleBuilder(kd) - valueBuilder := val.NewTupleBuilder(vd) - ns := m.NodeStore() - - return &statsIter{ - iter: iter, - kb: keyBuilder, - vb: valueBuilder, - ns: ns, - schemaName: schemaName, - planb: planbuilder.New(ctx, nil, nil, nil), - }, nil -} - -// statsIter reads histogram buckets into string-compatible types. -// Values that are SQL rows should be converted with statsIter.ParseRow. -// todo: make a JSON compatible container for sql.Row w/ types so that we -// can eagerly convert to sql.Row without sacrificing string printing. -type statsIter struct { - iter prolly.MapIter - kb, vb *val.TupleBuilder - ns tree.NodeStore - planb *planbuilder.Builder - currentQual string - schemaName string - currentTypes []sql.Type -} - -var _ sql.RowIter = (*statsIter)(nil) - -func (s *statsIter) Next(ctx *sql.Context) (sql.Row, error) { - k, v, err := s.iter.Next(ctx) - if err != nil { - return nil, err - } - - // deserialize K, V - version, err := tree.GetField(ctx, s.vb.Desc, 0, v, s.ns) - if err != nil { - return nil, err - } - if version != schema.StatsVersion { - return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion) - } - - var row sql.Row - for i := 0; i < s.kb.Desc.Count(); i++ { - f, err := tree.GetField(ctx, s.kb.Desc, i, k, s.ns) - if err != nil { - return nil, err - } - row = append(row, f) - } - - for i := 0; i < s.vb.Desc.Count(); i++ { - f, err := tree.GetField(ctx, s.vb.Desc, i, v, s.ns) - if err != nil { - return nil, err - } - row = append(row, f) - } - - dbName := row[schema.StatsDbTag].(string) - tableName := row[schema.StatsTableTag].(string) - indexName := row[schema.StatsIndexTag].(string) - position := row[schema.StatsPositionTag].(int64) - _ = row[schema.StatsVersionTag] - commit := hash.Parse(row[schema.StatsCommitHashTag].(string)) - rowCount := row[schema.StatsRowCountTag].(int64) - distinctCount := row[schema.StatsDistinctCountTag].(int64) - nullCount := row[schema.StatsNullCountTag].(int64) - columnsStr := row[schema.StatsColumnsTag].(string) - typesStr := row[schema.StatsTypesTag].(string) - upperBoundStr := row[schema.StatsUpperBoundTag].(string) - upperBoundCnt := row[schema.StatsUpperBoundCntTag].(int64) - createdAt := row[schema.StatsCreatedAtTag].(time.Time) - - typs := strings.Split(typesStr, "\n") - for i, t := range typs { - typs[i] = strings.TrimSpace(t) - } - - qual := sql.NewStatQualifier(dbName, s.schemaName, tableName, indexName) - if curQual := qual.String(); !strings.EqualFold(curQual, s.currentQual) { - s.currentQual = curQual - s.currentTypes, err = parseTypeStrings(typs) - if err != nil { - return nil, err - } - } - - mcvCountsStr := row[schema.StatsMcvCountsTag].(string) - - numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag - mcvs := make([]string, numMcvs) - for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] { - if v != nil { - mcvs[i] = v.(string) - } - } - - return sql.Row{ - dbName, - tableName, - indexName, - int(position), - version, - commit.String(), - uint64(rowCount), - uint64(distinctCount), - uint64(nullCount), - columnsStr, - typesStr, - upperBoundStr, - uint64(upperBoundCnt), - createdAt, - mcvs[0], mcvs[1], mcvs[2], mcvs[3], - mcvCountsStr, - }, nil -} - -func (s *statsIter) ParseRow(rowStr string) (sql.Row, error) { - var row sql.Row - for i, v := range strings.Split(rowStr, ",") { - val, _, err := s.currentTypes[i].Convert(v) - if err != nil { - return nil, err - } - row = append(row, val) - } - return row, nil -} - -func (s *statsIter) Close(context *sql.Context) error { - return nil -} diff --git a/go/libraries/doltcore/sqle/statsnoms/load.go b/go/libraries/doltcore/sqle/statsnoms/load.go deleted file mode 100644 index 72051260260..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/load.go +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "errors" - "fmt" - "io" - "strconv" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/planbuilder" - "github.com/dolthub/go-mysql-server/sql/stats" - - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/val" -) - -func loadStats(ctx *sql.Context, db dsess.SqlDatabase, m prolly.Map) (map[sql.StatQualifier]*statspro.DoltStats, error) { - qualToStats := make(map[sql.StatQualifier]*statspro.DoltStats) - schemaName := db.SchemaName() - iter, err := NewStatsIter(ctx, schemaName, m) - if err != nil { - return nil, err - } - currentStat := statspro.NewDoltStats() - invalidTables := make(map[string]bool) - for { - row, err := iter.Next(ctx) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return nil, err - } - - // deserialize K, V - dbName := row[schema.StatsDbTag].(string) - tableName := row[schema.StatsTableTag].(string) - indexName := row[schema.StatsIndexTag].(string) - _ = row[schema.StatsVersionTag] - commit := hash.Parse(row[schema.StatsCommitHashTag].(string)) - rowCount := row[schema.StatsRowCountTag].(uint64) - distinctCount := row[schema.StatsDistinctCountTag].(uint64) - nullCount := row[schema.StatsNullCountTag].(uint64) - columns := strings.Split(row[schema.StatsColumnsTag].(string), ",") - typesStr := row[schema.StatsTypesTag].(string) - boundRowStr := row[schema.StatsUpperBoundTag].(string) - upperBoundCnt := row[schema.StatsUpperBoundCntTag].(uint64) - createdAt := row[schema.StatsCreatedAtTag].(time.Time) - - typs := strings.Split(typesStr, "\n") - for i, t := range typs { - typs[i] = strings.TrimSpace(t) - } - - qual := sql.NewStatQualifier(dbName, schemaName, tableName, indexName) - if _, ok := invalidTables[tableName]; ok { - continue - } - - if currentStat.Statistic.Qual.String() != qual.String() { - if !currentStat.Statistic.Qual.Empty() { - currentStat.UpdateActive() - qualToStats[currentStat.Statistic.Qual] = currentStat - } - - currentStat = statspro.NewDoltStats() - - tab, ok, err := db.GetTableInsensitive(ctx, qual.Table()) - if ok { - currentStat.Statistic.Qual = qual - currentStat.Statistic.Cols = columns - currentStat.Statistic.LowerBnd, currentStat.Tb, currentStat.Statistic.Fds, currentStat.Statistic.Colset, err = loadRefdProps(ctx, db, tab, currentStat.Statistic.Qual, len(currentStat.Columns())) - if err != nil { - return nil, err - } - } else if !ok { - ctx.GetLogger().Debugf("stats load: table previously collected is missing from root: %s", tableName) - invalidTables[qual.Table()] = true - continue - } else if err != nil { - return nil, err - } - } - - numMcvs := schema.StatsMcvCountsTag - schema.StatsMcv1Tag - - mcvCountsStr := strings.Split(row[schema.StatsMcvCountsTag].(string), ",") - mcvCnts := make([]uint64, numMcvs) - for i, v := range mcvCountsStr { - if v == "" { - continue - } - val, err := strconv.Atoi(v) - if err != nil { - return nil, err - } - mcvCnts[i] = uint64(val) - } - - mcvs := make([]sql.Row, numMcvs) - for i, v := range row[schema.StatsMcv1Tag:schema.StatsMcvCountsTag] { - if v != nil && v != "" { - row, err := DecodeRow(ctx, m.NodeStore(), v.(string), currentStat.Tb) - if err != nil { - return nil, err - } - mcvs[i] = row - } - } - - for i, v := range mcvCnts { - if v == 0 { - mcvs = mcvs[:i] - mcvCnts = mcvCnts[:i] - break - } - } - - if currentStat.Statistic.Hist == nil { - currentStat.Statistic.Typs, err = parseTypeStrings(typs) - if err != nil { - return nil, err - } - currentStat.Statistic.Qual = qual - } - - boundRow, err := DecodeRow(ctx, m.NodeStore(), boundRowStr, currentStat.Tb) - if err != nil { - return nil, err - } - - bucket := statspro.DoltBucket{ - Chunk: commit, - Created: createdAt, - Bucket: &stats.Bucket{ - RowCnt: uint64(rowCount), - DistinctCnt: uint64(distinctCount), - NullCnt: uint64(nullCount), - McvVals: mcvs, - McvsCnt: mcvCnts, - BoundCnt: upperBoundCnt, - BoundVal: boundRow, - }, - } - - currentStat.Hist = append(currentStat.Hist, bucket) - currentStat.Statistic.RowCnt += uint64(rowCount) - currentStat.Statistic.DistinctCnt += uint64(distinctCount) - currentStat.Statistic.NullCnt += uint64(rowCount) - if currentStat.Statistic.Created.Before(createdAt) { - currentStat.Statistic.Created = createdAt - } - } - if !currentStat.Qualifier().Empty() { - currentStat.UpdateActive() - qualToStats[currentStat.Statistic.Qual] = currentStat - } - return qualToStats, nil -} - -func parseTypeStrings(typs []string) ([]sql.Type, error) { - var ret []sql.Type - for _, typ := range typs { - ct, err := planbuilder.ParseColumnTypeString(typ) - if err != nil { - return nil, err - } - ret = append(ret, ct) - } - return ret, nil -} - -func loadRefdProps(ctx *sql.Context, db dsess.SqlDatabase, sqlTable sql.Table, qual sql.StatQualifier, cols int) (sql.Row, *val.TupleBuilder, *sql.FuncDepSet, sql.ColSet, error) { - root, err := db.GetRoot(ctx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - iat, ok := sqlTable.(sql.IndexAddressable) - if !ok { - return nil, nil, nil, sql.ColSet{}, nil - } - - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - var sqlIdx sql.Index - for _, i := range indexes { - if strings.EqualFold(i.ID(), qual.Index()) { - sqlIdx = i - break - } - } - - if sqlIdx == nil { - return nil, nil, nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index()) - } - - fds, colset, err := stats.IndexFds(qual.Table(), sqlTable.Schema(), sqlIdx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - table, ok, err := root.GetTable(ctx, doltdb.TableName{Name: sqlTable.Name()}) - if !ok { - return nil, nil, nil, sql.ColSet{}, sql.ErrTableNotFound.New(qual.Table()) - } - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - var idx durable.Index - if qual.Index() == "primary" { - idx, err = table.GetRowData(ctx) - } else { - idx, err = table.GetIndexRowData(ctx, qual.Index()) - } - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - - prollyMap := durable.ProllyMapFromIndex(idx) - keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(cols)) - buffPool := prollyMap.NodeStore().Pool() - - if cnt, err := prollyMap.Count(); err != nil { - return nil, nil, nil, sql.ColSet{}, err - } else if cnt == 0 { - return nil, keyBuilder, nil, sql.ColSet{}, nil - } - firstIter, err := prollyMap.IterOrdinalRange(ctx, 0, 1) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - keyBytes, _, err := firstIter.Next(ctx) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - for i := range keyBuilder.Desc.Types { - keyBuilder.PutRaw(i, keyBytes.GetField(i)) - } - - firstKey := keyBuilder.Build(buffPool) - firstRow := make(sql.Row, keyBuilder.Desc.Count()) - for i := 0; i < keyBuilder.Desc.Count(); i++ { - firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore()) - if err != nil { - return nil, nil, nil, sql.ColSet{}, err - } - } - return firstRow, keyBuilder, fds, colset, nil -} - -func loadFuncDeps(ctx *sql.Context, db dsess.SqlDatabase, qual sql.StatQualifier) (*sql.FuncDepSet, sql.ColSet, error) { - tab, ok, err := db.GetTableInsensitive(ctx, qual.Table()) - if err != nil { - return nil, sql.ColSet{}, err - } else if !ok { - return nil, sql.ColSet{}, fmt.Errorf("%w: table not found: '%s'", statspro.ErrFailedToLoad, qual.Table()) - } - - iat, ok := tab.(sql.IndexAddressable) - if !ok { - return nil, sql.ColSet{}, fmt.Errorf("%w: table does not have indexes: '%s'", statspro.ErrFailedToLoad, qual.Table()) - } - - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return nil, sql.ColSet{}, err - } - - var idx sql.Index - for _, i := range indexes { - if strings.EqualFold(i.ID(), qual.Index()) { - idx = i - break - } - } - - if idx == nil { - return nil, sql.ColSet{}, fmt.Errorf("%w: index not found: '%s'", statspro.ErrFailedToLoad, qual.Index()) - } - - return stats.IndexFds(qual.Table(), tab.Schema(), idx) -} diff --git a/go/libraries/doltcore/sqle/statsnoms/write.go b/go/libraries/doltcore/sqle/statsnoms/write.go deleted file mode 100644 index c23e1d93dc8..00000000000 --- a/go/libraries/doltcore/sqle/statsnoms/write.go +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statsnoms - -import ( - "context" - "errors" - "io" - "strings" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/stats" - "github.com/dolthub/go-mysql-server/sql/types" - - "github.com/dolthub/dolt/go/libraries/doltcore/schema" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro" - "github.com/dolthub/dolt/go/store/prolly" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/dolt/go/store/val" -) - -// About ~200 20 byte address fit in a ~4k chunk. Chunk sizes -// are approximate, but certainly shouldn't reach the square -// of the expected size. -const maxBucketFanout = 200 * 200 - -var mcvsTypes = []sql.Type{types.Int64, types.Int64, types.Int64} - -func (n *NomsStatsDatabase) replaceStats(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error { - if err := deleteIndexRows(ctx, statsMap, dStats); err != nil { - return err - } - return putIndexRows(ctx, statsMap, dStats) -} - -func deleteIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error { - if ctx.Err() != nil { - return ctx.Err() - } - sch := schema.StatsTableDoltSchema - kd, _ := sch.GetMapDescriptors() - - keyBuilder := val.NewTupleBuilder(kd) - - qual := dStats.Qualifier() - pool := statsMap.NodeStore().Pool() - - // delete previous entries for this index -> (db, table, index, pos) - keyBuilder.PutString(0, qual.Database) - keyBuilder.PutString(1, qual.Table()) - keyBuilder.PutString(2, qual.Index()) - keyBuilder.PutInt64(3, 0) - firstKey := keyBuilder.Build(pool) - keyBuilder.PutString(0, qual.Database) - keyBuilder.PutString(1, qual.Table()) - keyBuilder.PutString(2, qual.Index()) - keyBuilder.PutInt64(3, maxBucketFanout+1) - maxKey := keyBuilder.Build(pool) - - // there is a limit on the number of buckets for a given index, iter - // will terminate before maxBucketFanout - iter, err := statsMap.IterKeyRange(ctx, firstKey, maxKey) - if err != nil { - return err - } - - for { - k, _, err := iter.Next(ctx) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return err - } - err = statsMap.Put(ctx, k, nil) - if err != nil { - return err - } - } - return nil -} - -func putIndexRows(ctx context.Context, statsMap *prolly.MutableMap, dStats *statspro.DoltStats) error { - if ctx.Err() != nil { - return ctx.Err() - } - sch := schema.StatsTableDoltSchema - kd, vd := sch.GetMapDescriptors() - - keyBuilder := val.NewTupleBuilder(kd) - valueBuilder := val.NewTupleBuilder(vd) - - qual := dStats.Qualifier() - pool := statsMap.NodeStore().Pool() - - // now add new buckets - typesB := strings.Builder{} - sep := "" - for _, t := range dStats.Statistic.Typs { - typesB.WriteString(sep + t.String()) - sep = "\n" - } - typesStr := typesB.String() - - var pos int64 - for _, h := range dStats.Hist { - keyBuilder.PutString(0, qual.Database) - keyBuilder.PutString(1, qual.Tab) - keyBuilder.PutString(2, qual.Idx) - keyBuilder.PutInt64(3, pos) - - valueBuilder.PutInt64(0, schema.StatsVersion) - valueBuilder.PutString(1, statspro.DoltBucketChunk(h).String()) - valueBuilder.PutInt64(2, int64(h.RowCount())) - valueBuilder.PutInt64(3, int64(h.DistinctCount())) - valueBuilder.PutInt64(4, int64(h.NullCount())) - valueBuilder.PutString(5, strings.Join(dStats.Columns(), ",")) - valueBuilder.PutString(6, typesStr) - boundRow, err := EncodeRow(ctx, statsMap.NodeStore(), h.UpperBound(), dStats.Tb) - if err != nil { - return err - } - valueBuilder.PutString(7, string(boundRow)) - valueBuilder.PutInt64(8, int64(h.BoundCount())) - valueBuilder.PutDatetime(9, statspro.DoltBucketCreated(h)) - for i, r := range h.Mcvs() { - mcvRow, err := EncodeRow(ctx, statsMap.NodeStore(), r, dStats.Tb) - if err != nil { - return err - } - valueBuilder.PutString(10+i, string(mcvRow)) - } - var mcvCntsRow sql.Row - for _, v := range h.McvCounts() { - mcvCntsRow = append(mcvCntsRow, int(v)) - } - valueBuilder.PutString(14, stats.StringifyKey(mcvCntsRow, mcvsTypes)) - - key := keyBuilder.Build(pool) - value := valueBuilder.Build(pool) - statsMap.Put(ctx, key, value) - pos++ - } - return nil -} - -func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) { - for i, v := range r { - if v == nil { - continue - } - if err := tree.PutField(ctx, ns, tb, i, v); err != nil { - return nil, err - } - } - return tb.Build(ns.Pool()), nil -} - -func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) { - tup := []byte(s) - r := make(sql.Row, tb.Desc.Count()) - var err error - for i, _ := range r { - r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns) - if err != nil { - return nil, err - } - } - return r, nil -} diff --git a/go/libraries/doltcore/sqle/statspro/analyze.go b/go/libraries/doltcore/sqle/statspro/analyze.go deleted file mode 100644 index faa1869315c..00000000000 --- a/go/libraries/doltcore/sqle/statspro/analyze.go +++ /dev/null @@ -1,343 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "fmt" - "strings" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly/tree" -) - -const ( - boostrapRowLimit = 2e6 -) - -func (p *Provider) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error { - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return err - } - return p.RefreshTableStatsWithBranch(ctx, table, db, branch) -} - -func (p *Provider) BootstrapDatabaseStats(ctx *sql.Context, db string) error { - dSess := dsess.DSessFromSess(ctx.Session) - branches := p.getStatsBranches(ctx) - var rows uint64 - for _, branch := range branches { - sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch)) - if err != nil { - if sql.ErrDatabaseNotFound.Is(err) { - // default branch is not valid - continue - } - return err - } - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - for _, table := range tables { - sqlTable, _, err := GetLatestTable(ctx, table, sqlDb) - if err != nil { - return err - } - - if st, ok := sqlTable.(sql.StatisticsTable); ok { - cnt, ok, err := st.RowCount(ctx) - if ok && err == nil { - rows += cnt - } - } - if rows >= boostrapRowLimit { - return fmt.Errorf("stats bootstrap aborted because %s exceeds the default row limit; manually run \"ANALYZE \" or \"call dolt_stats_restart()\" to collect statistics", db) - } - - if err := p.RefreshTableStatsWithBranch(ctx, sqlTable, db, branch); err != nil { - return err - } - } - } - return nil -} - -func (p *Provider) RefreshTableStatsWithBranch(ctx *sql.Context, table sql.Table, db string, branch string) error { - if !p.TryLockForUpdate(branch, db, table.Name()) { - return fmt.Errorf("already updating statistics") - } - defer p.UnlockTable(branch, db, table.Name()) - - dSess := dsess.DSessFromSess(ctx.Session) - - sqlDb, err := dSess.Provider().Database(ctx, BranchQualifiedDatabase(db, branch)) - if err != nil { - return err - } - - // lock only after accessing DatabaseProvider - - tableName := strings.ToLower(table.Name()) - dbName := strings.ToLower(db) - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - iat, ok := table.(sql.IndexAddressableTable) - if !ok { - return nil - } - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return err - } - - // it's important to update WORKING session references every call - sqlTable, dTab, err := GetLatestTable(ctx, tableName, sqlDb) - if err != nil { - return err - } - - statDb, ok := p.getStatDb(dbName) - if !ok { - // if the stats database does not exist, initialize one - fs, err := p.pro.FileSystemForDatabase(dbName) - if err != nil { - return err - } - sourceDb, ok := p.pro.BaseDatabase(ctx, dbName) - if !ok { - return sql.ErrDatabaseNotFound.New(dbName) - } - statDb, err = p.sf.Init(ctx, sourceDb, p.pro, fs, env.GetCurrentUserHomeDir) - if err != nil { - ctx.Warn(0, err.Error()) - return nil - } - p.setStatDb(dbName, statDb) - } - - schHash, err := dTab.GetSchemaHash(ctx) - if err != nil { - return err - } - - if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, tableName); oldSchHash.IsEmpty() { - if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil { - return fmt.Errorf("set schema hash error: %w", err) - } - } else if oldSchHash != schHash { - ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch) - if err := statDb.SetSchemaHash(ctx, branch, tableName, schHash); err != nil { - return err - } - - stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, tableName) - if err != nil { - return err - } - for _, stat := range stats { - statDb.DeleteStats(ctx, branch, stat.Qualifier()) - } - } else if err != nil { - return err - } - - tablePrefix := fmt.Sprintf("%s.", tableName) - var idxMetas []indexMeta - for _, idx := range indexes { - cols := make([]string, len(idx.Expressions())) - for i, c := range idx.Expressions() { - cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) - } - - qual := sql.NewStatQualifier(db, schemaName, table.Name(), strings.ToLower(idx.ID())) - curStat, ok := statDb.GetStat(branch, qual) - if !ok { - curStat = NewDoltStats() - curStat.Statistic.Qual = qual - } - idxMeta, err := newIdxMeta(ctx, curStat, dTab, idx, cols) - if err != nil { - return err - } - idxMetas = append(idxMetas, idxMeta) - } - - newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) - if err != nil { - return err - } - - // merge new chunks with preexisting chunks - for _, idxMeta := range idxMetas { - stat := newTableStats[idxMeta.qual] - targetChunks, err := MergeNewChunks(idxMeta.allAddrs, idxMeta.keepChunks, stat.Hist) - if err != nil { - return err - } - if targetChunks == nil { - // empty table - continue - } - stat.SetChunks(idxMeta.allAddrs) - stat.Hist = targetChunks - stat.UpdateActive() - if err := statDb.SetStat(ctx, branch, idxMeta.qual, stat); err != nil { - return err - } - } - - p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName)) - return statDb.Flush(ctx, branch) -} - -// BranchQualifiedDatabase returns a branch qualified database. If the database -// is already branch suffixed no duplication is applied. -func BranchQualifiedDatabase(db, branch string) string { - suffix := fmt.Sprintf("/%s", branch) - if !strings.HasSuffix(db, suffix) { - return fmt.Sprintf("%s%s", db, suffix) - } - return db -} - -// GetLatestTable will get the WORKING root table for the current database/branch -func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (sql.Table, *doltdb.Table, error) { - var db sqle.Database - switch d := sqlDb.(type) { - case sqle.Database: - db = d - case sqle.ReadReplicaDatabase: - db = d.Database - default: - return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb) - } - sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName) - if err != nil { - return nil, nil, err - } - if !ok { - return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName) - } - - var dTab *doltdb.Table - switch t := sqlTable.(type) { - case *sqle.AlterableDoltTable: - dTab, err = t.DoltTable.DoltTable(ctx) - case *sqle.WritableDoltTable: - dTab, err = t.DoltTable.DoltTable(ctx) - case *sqle.DoltTable: - dTab, err = t.DoltTable(ctx) - default: - err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable) - } - if err != nil { - return nil, nil, err - } - return sqlTable, dTab, nil -} - -func newIdxMeta(ctx *sql.Context, curStats *DoltStats, doltTable *doltdb.Table, sqlIndex sql.Index, cols []string) (indexMeta, error) { - var idx durable.Index - var err error - if strings.EqualFold(sqlIndex.ID(), "PRIMARY") { - idx, err = doltTable.GetRowData(ctx) - } else { - idx, err = doltTable.GetIndexRowData(ctx, sqlIndex.ID()) - } - if err != nil { - return indexMeta{}, err - } - - prollyMap := durable.ProllyMapFromIndex(idx) - - if cnt, err := prollyMap.Count(); err != nil { - return indexMeta{}, err - } else if cnt == 0 { - return indexMeta{ - qual: curStats.Statistic.Qual, - cols: cols, - }, nil - } - - // get newest histogram target level hashes - levelNodes, err := tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt) - if err != nil { - return indexMeta{}, err - } - - var addrs []hash.Hash - var keepChunks []sql.HistogramBucket - var missingAddrs float64 - var missingChunks []tree.Node - var missingOffsets []updateOrdinal - var offset uint64 - - for _, n := range levelNodes { - // Compare the previous histogram chunks to the newest tree chunks. - // Partition the newest chunks into 1) preserved or 2) missing. - // Missing chunks will need to be scanned on a stats update, so - // track the (start, end) ordinal offsets to simplify the read iter. - treeCnt, err := n.TreeCount() - if err != nil { - return indexMeta{}, err - } - - addrs = append(addrs, n.HashOf()) - if bucketIdx, ok := curStats.Active[n.HashOf()]; !ok { - missingChunks = append(missingChunks, n) - missingOffsets = append(missingOffsets, updateOrdinal{offset, offset + uint64(treeCnt)}) - missingAddrs++ - } else { - keepChunks = append(keepChunks, curStats.Hist[bucketIdx]) - } - offset += uint64(treeCnt) - } - - var dropChunks []sql.HistogramBucket - for _, h := range curStats.Chunks { - var match bool - for _, b := range keepChunks { - if DoltBucketChunk(b) == h { - match = true - break - } - } - if !match { - dropChunks = append(dropChunks, curStats.Hist[curStats.Active[h]]) - } - } - - return indexMeta{ - qual: curStats.Statistic.Qual, - cols: cols, - newNodes: missingChunks, - updateOrdinals: missingOffsets, - keepChunks: keepChunks, - dropChunks: dropChunks, - allAddrs: addrs, - }, nil -} diff --git a/go/libraries/doltcore/sqle/statspro/auto_refresh.go b/go/libraries/doltcore/sqle/statspro/auto_refresh.go deleted file mode 100644 index 3322065f809..00000000000 --- a/go/libraries/doltcore/sqle/statspro/auto_refresh.go +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - types2 "github.com/dolthub/go-mysql-server/sql/types" - - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" -) - -const asyncAutoRefreshStats = "async_auto_refresh_stats" - -func (p *Provider) InitAutoRefresh(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads) error { - _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold) - _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval) - interval64, _, _ := types2.Int64.Convert(interval) - intervalSec := time.Second * time.Duration(interval64.(int64)) - thresholdf64 := threshold.(float64) - - ctx, err := ctxFactory(context.Background()) - if err != nil { - return err - } - - branches := p.getStatsBranches(ctx) - - return p.InitAutoRefreshWithParams(ctxFactory, dbName, bThreads, intervalSec, thresholdf64, branches) -} - -func (p *Provider) InitAutoRefreshWithParams(ctxFactory func(ctx context.Context) (*sql.Context, error), dbName string, bThreads *sql.BackgroundThreads, checkInterval time.Duration, updateThresh float64, branches []string) error { - // this is only called after initial statistics are finished loading - // launch a thread that periodically checks freshness - - p.mu.Lock() - defer p.mu.Unlock() - - dropDbCtx, dbStatsCancel := context.WithCancel(context.Background()) - p.autoCtxCancelers[dbName] = dbStatsCancel - - return bThreads.Add(fmt.Sprintf("%s_%s", asyncAutoRefreshStats, dbName), func(ctx context.Context) { - ticker := time.NewTicker(checkInterval + time.Nanosecond) - for { - select { - case <-ctx.Done(): - ticker.Stop() - return - case <-ticker.C: - select { - case <-dropDbCtx.Done(): - ticker.Stop() - return - default: - } - - sqlCtx, err := ctxFactory(ctx) - if err != nil { - return - } - - dSess := dsess.DSessFromSess(sqlCtx.Session) - ddb, ok := dSess.GetDoltDB(sqlCtx, dbName) - if !ok { - sqlCtx.GetLogger().Debugf("statistics refresh error: database not found %s", dbName) - return - } - for _, branch := range branches { - if br, ok, err := ddb.HasBranch(ctx, branch); ok { - sqlCtx.GetLogger().Debugf("starting statistics refresh check for '%s': %s", dbName, time.Now().String()) - // update WORKING session references - sqlDb, err := dSess.Provider().Database(sqlCtx, BranchQualifiedDatabase(dbName, branch)) - if err != nil { - sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) - return - } - - if err := p.checkRefresh(sqlCtx, sqlDb, dbName, br, updateThresh); err != nil { - sqlCtx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) - return - } - } else if err != nil { - sqlCtx.GetLogger().Debugf("statistics refresh error: branch check error %s", err.Error()) - } else { - sqlCtx.GetLogger().Debugf("statistics refresh error: branch not found %s", br) - } - } - } - } - }) -} - -func (p *Provider) checkRefresh(ctx *sql.Context, sqlDb sql.Database, dbName, branch string, updateThresh float64) error { - if !p.TryLockForUpdate(branch, dbName, "") { - return fmt.Errorf("database already being updated: %s/%s", branch, dbName) - } - defer p.UnlockTable(branch, dbName, "") - - // Iterate all dbs, tables, indexes. Each db will collect - // []indexMeta above refresh threshold. We read and process those - // chunks' statistics. We merge updated chunks with precomputed - // chunks. The full set of statistics for each database lands - // 1) in the provider's most recent set of database statistics, and - // 2) on disk in the database's statistics ref'd prolly.Map. - statDb, ok := p.getStatDb(dbName) - if !ok { - return sql.ErrDatabaseNotFound.New(dbName) - } - - var deletedStats []sql.StatQualifier - qualExists := make(map[sql.StatQualifier]bool) - tableExistsAndSkipped := make(map[string]bool) - - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - - for _, table := range tables { - if !p.TryLockForUpdate(branch, dbName, table) { - ctx.GetLogger().Debugf("statistics refresh: table is already being updated: %s/%s.%s", branch, dbName, table) - return fmt.Errorf("table already being updated: %s", table) - } - defer p.UnlockTable(branch, dbName, table) - - sqlTable, dTab, err := GetLatestTable(ctx, table, sqlDb) - if err != nil { - return err - } - - tableHash, err := dTab.GetRowDataHash(ctx) - if err != nil { - return err - } - - if statDb.GetTableHash(branch, table) == tableHash { - // no data changes since last check - tableExistsAndSkipped[table] = true - ctx.GetLogger().Debugf("statistics refresh: table hash unchanged since last check: %s", tableHash) - continue - } else { - ctx.GetLogger().Debugf("statistics refresh: new table hash: %s", tableHash) - } - - schHash, err := dTab.GetSchemaHash(ctx) - if err != nil { - return err - } - - var schemaName string - if schTab, ok := sqlTable.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - if oldSchHash, err := statDb.GetSchemaHash(ctx, branch, table); oldSchHash.IsEmpty() { - if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil { - return err - } - } else if oldSchHash != schHash { - ctx.GetLogger().Debugf("statistics refresh: detected table schema change: %s,%s/%s", dbName, table, branch) - if err := statDb.SetSchemaHash(ctx, branch, table, schHash); err != nil { - return err - } - stats, err := p.GetTableDoltStats(ctx, branch, dbName, schemaName, table) - if err != nil { - return err - } - for _, stat := range stats { - statDb.DeleteStats(ctx, branch, stat.Qualifier()) - } - } else if err != nil { - return err - } - - iat, ok := sqlTable.(sql.IndexAddressableTable) - if !ok { - return fmt.Errorf("table does not support indexes %s", table) - } - - indexes, err := iat.GetIndexes(ctx) - if err != nil { - return err - } - - // collect indexes and ranges to be updated - var idxMetas []indexMeta - for _, index := range indexes { - qual := sql.NewStatQualifier(dbName, schemaName, table, strings.ToLower(index.ID())) - qualExists[qual] = true - curStat, ok := statDb.GetStat(branch, qual) - if !ok { - curStat = NewDoltStats() - curStat.Statistic.Qual = qual - - cols := make([]string, len(index.Expressions())) - tablePrefix := fmt.Sprintf("%s.", table) - for i, c := range index.Expressions() { - cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) - } - curStat.Statistic.Cols = cols - } - ctx.GetLogger().Debugf("statistics refresh index: %s", qual.String()) - - updateMeta, err := newIdxMeta(ctx, curStat, dTab, index, curStat.Columns()) - if err != nil { - ctx.GetLogger().Debugf("statistics refresh error: %s", err.Error()) - continue - } - curCnt := float64(len(curStat.Active)) - updateCnt := float64(len(updateMeta.newNodes)) - deleteCnt := float64(len(curStat.Active) - len(updateMeta.keepChunks)) - ctx.GetLogger().Debugf("statistics current: %d, new: %d, delete: %d", int(curCnt), int(updateCnt), int(deleteCnt)) - - if curCnt == 0 || (deleteCnt+updateCnt)/curCnt > updateThresh { - if curCnt == 0 && updateCnt == 0 { - continue - } - ctx.GetLogger().Debugf("statistics updating: %s", updateMeta.qual) - // mark index for updating - idxMetas = append(idxMetas, updateMeta) - // update latest hash if we haven't already - statDb.SetTableHash(branch, table, tableHash) - } - } - - // get new buckets for index chunks to update - newTableStats, err := createNewStatsBuckets(ctx, sqlTable, dTab, indexes, idxMetas) - if err != nil { - return err - } - - // merge new chunks with preexisting chunks - for _, updateMeta := range idxMetas { - stat := newTableStats[updateMeta.qual] - if stat != nil { - var err error - if _, ok := statDb.GetStat(branch, updateMeta.qual); !ok { - err = statDb.SetStat(ctx, branch, updateMeta.qual, stat) - } else { - err = statDb.ReplaceChunks(ctx, branch, updateMeta.qual, updateMeta.allAddrs, updateMeta.dropChunks, stat.Hist) - } - if err != nil { - return err - } - p.UpdateStatus(dbName, fmt.Sprintf("refreshed %s", dbName)) - } - } - } - - for _, q := range statDb.ListStatQuals(branch) { - // table or index delete leaves hole in stats - // this is separate from threshold check - if !tableExistsAndSkipped[q.Table()] && !qualExists[q] { - // only delete stats we've verified are deleted - deletedStats = append(deletedStats, q) - } - } - - statDb.DeleteStats(ctx, branch, deletedStats...) - - if err := statDb.Flush(ctx, branch); err != nil { - return err - } - - return nil -} diff --git a/go/libraries/doltcore/sqle/statspro/update.go b/go/libraries/doltcore/sqle/statspro/bucket_builder.go similarity index 52% rename from go/libraries/doltcore/sqle/statspro/update.go rename to go/libraries/doltcore/sqle/statspro/bucket_builder.go index 562e82c5679..2c974223f84 100644 --- a/go/libraries/doltcore/sqle/statspro/update.go +++ b/go/libraries/doltcore/sqle/statspro/bucket_builder.go @@ -17,19 +17,11 @@ package statspro import ( "container/heap" "context" - "errors" - "fmt" - "io" "sort" - "strings" - "time" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/stats" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" - "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/prolly" "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/val" @@ -40,153 +32,7 @@ const ( mcvCnt = 3 ) -// createNewStatsBuckets builds histograms for a list of index statistic metadata. -// We only read chunk ranges indicated by |indexMeta.updateOrdinals|. If -// the returned buckets are a subset of the index the caller is responsible -// for reconciling the difference. -func createNewStatsBuckets(ctx *sql.Context, sqlTable sql.Table, dTab *doltdb.Table, indexes []sql.Index, idxMetas []indexMeta) (map[sql.StatQualifier]*DoltStats, error) { - nameToIdx := make(map[string]sql.Index) - for _, idx := range indexes { - nameToIdx[strings.ToLower(idx.ID())] = idx - } - - ret := make(map[sql.StatQualifier]*DoltStats) - - for _, meta := range idxMetas { - var idx durable.Index - var err error - if strings.EqualFold(meta.qual.Index(), "PRIMARY") { - idx, err = dTab.GetRowData(ctx) - } else { - idx, err = dTab.GetIndexRowData(ctx, meta.qual.Index()) - } - if err != nil { - return nil, err - } - - prollyMap := durable.ProllyMapFromIndex(idx) - keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc()) - - sqlIdx := nameToIdx[strings.ToLower(meta.qual.Index())] - fds, colSet, err := stats.IndexFds(meta.qual.Table(), sqlTable.Schema(), sqlIdx) - if err != nil { - return nil, err - } - - var types []sql.Type - for _, cet := range nameToIdx[strings.ToLower(meta.qual.Index())].ColumnExpressionTypes() { - types = append(types, cet.Type) - } - - if cnt, err := prollyMap.Count(); err != nil { - return nil, err - } else if cnt == 0 { - // table is empty - ret[meta.qual] = NewDoltStats() - ret[meta.qual].Statistic.Created = time.Now() - ret[meta.qual].Statistic.Cols = meta.cols - ret[meta.qual].Statistic.Typs = types - ret[meta.qual].Statistic.Qual = meta.qual - - ret[meta.qual].Statistic.Fds = fds - ret[meta.qual].Statistic.Colset = colSet - ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols))) - - continue - } - - firstRow, err := firstRowForIndex(ctx, prollyMap, keyBuilder, len(meta.cols)) - if err != nil { - return nil, err - } - - updater := newBucketBuilder(meta.qual, len(meta.cols), prollyMap.KeyDesc()) - ret[meta.qual] = NewDoltStats() - ret[meta.qual].Chunks = meta.allAddrs - ret[meta.qual].Statistic.Created = time.Now() - ret[meta.qual].Statistic.Cols = meta.cols - ret[meta.qual].Statistic.Typs = types - ret[meta.qual].Statistic.Qual = meta.qual - ret[meta.qual].Tb = val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(len(meta.cols))) - - var start, stop uint64 - // read leaf rows for each bucket - for i, chunk := range meta.newNodes { - // each node is a bucket - updater.newBucket() - - // we read exclusive range [node first key, next node first key) - start, stop = meta.updateOrdinals[i].start, meta.updateOrdinals[i].stop - iter, err := prollyMap.IterOrdinalRange(ctx, start, stop) - if err != nil { - return nil, err - } - for { - // stats key will be a prefix of the index key - keyBytes, _, err := iter.Next(ctx) - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return nil, err - } - // build full key - for i := range keyBuilder.Desc.Types { - keyBuilder.PutRaw(i, keyBytes.GetField(i)) - } - - updater.add(keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen)) - keyBuilder.Recycle() - } - - // finalize the aggregation - bucket, err := updater.finalize(ctx, prollyMap.NodeStore()) - if err != nil { - return nil, err - } - bucket.Chunk = chunk.HashOf() - ret[updater.qual].Hist = append(ret[updater.qual].Hist, bucket) - } - - ret[updater.qual].Statistic.DistinctCnt = uint64(updater.globalDistinct) - ret[updater.qual].Statistic.RowCnt = uint64(updater.globalCount) - ret[updater.qual].Statistic.LowerBnd = firstRow - ret[updater.qual].Statistic.Fds = fds - ret[updater.qual].Statistic.Colset = colSet - ret[updater.qual].UpdateActive() - } - return ret, nil -} - -// MergeNewChunks combines a set of old and new chunks to create -// the desired target histogram. Undefined behavior if a |targetHash| -// does not exist in either |oldChunks| or |newChunks|. -func MergeNewChunks(inputHashes []hash.Hash, oldChunks, newChunks []sql.HistogramBucket) ([]sql.HistogramBucket, error) { - hashToPos := make(map[hash.Hash]int, len(inputHashes)) - for i, h := range inputHashes { - hashToPos[h] = i - } - - var cnt int - targetBuckets := make([]sql.HistogramBucket, len(inputHashes)) - for _, c := range oldChunks { - if idx, ok := hashToPos[DoltBucketChunk(c)]; ok { - cnt++ - targetBuckets[idx] = c - } - } - for _, c := range newChunks { - if idx, ok := hashToPos[DoltBucketChunk(c)]; ok && targetBuckets[idx] == nil { - cnt++ - targetBuckets[idx] = c - } - } - if cnt != len(inputHashes) { - return nil, fmt.Errorf("encountered invalid statistic chunks") - } - return targetBuckets, nil -} - -func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder, prefixLen int) (sql.Row, error) { +func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.TupleBuilder) (sql.Row, error) { if cnt, err := prollyMap.Count(); err != nil { return nil, err } else if cnt == 0 { @@ -208,9 +54,9 @@ func firstRowForIndex(ctx *sql.Context, prollyMap prolly.Map, keyBuilder *val.Tu keyBuilder.PutRaw(i, keyBytes.GetField(i)) } - firstKey := keyBuilder.BuildPrefixNoRecycle(buffPool, prefixLen) - firstRow := make(sql.Row, prefixLen) - for i := 0; i < prefixLen; i++ { + firstKey := keyBuilder.Build(buffPool) + firstRow := make(sql.Row, firstKey.Count()) + for i := range firstRow { firstRow[i], err = tree.GetField(ctx, prollyMap.KeyDesc(), i, firstKey, prollyMap.NodeStore()) if err != nil { return nil, err @@ -266,7 +112,7 @@ func (u *bucketBuilder) newBucket() { // finalize converts the current aggregation stats into a histogram bucket, // which includes deserializing most common value tuples into sql.Rows. -func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBucket, error) { +func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (*stats.Bucket, error) { // update MCV in case we've ended on a run of many identical keys u.updateMcv() @@ -276,27 +122,25 @@ func (u *bucketBuilder) finalize(ctx context.Context, ns tree.NodeStore) (DoltBu // convert the MCV tuples into SQL rows (most efficient to only do this once) mcvRows, err := u.mcvs.Values(ctx, u.tupleDesc, ns, u.prefixLen) if err != nil { - return DoltBucket{}, err + return nil, err } upperBound := make(sql.Row, u.prefixLen) if u.currentKey != nil { for i := 0; i < u.prefixLen; i++ { upperBound[i], err = tree.GetField(ctx, u.tupleDesc, i, u.currentKey, ns) if err != nil { - return DoltBucket{}, err + return nil, err } } } - return DoltBucket{ - Bucket: &stats.Bucket{ - RowCnt: uint64(u.count), - DistinctCnt: uint64(u.distinct), - BoundCnt: uint64(u.currentCnt), - McvVals: mcvRows, - McvsCnt: u.mcvs.Counts(), - BoundVal: upperBound, - NullCnt: uint64(u.nulls), - }, + return &stats.Bucket{ + RowCnt: uint64(u.count), + DistinctCnt: uint64(u.distinct), + BoundCnt: uint64(u.currentCnt), + McvVals: mcvRows, + McvsCnt: u.mcvs.Counts(), + BoundVal: upperBound, + NullCnt: uint64(u.nulls), }, nil } diff --git a/go/libraries/doltcore/sqle/statspro/update_test.go b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go similarity index 92% rename from go/libraries/doltcore/sqle/statspro/update_test.go rename to go/libraries/doltcore/sqle/statspro/bucket_builder_test.go index ef670e19c8b..e97ad343755 100644 --- a/go/libraries/doltcore/sqle/statspro/update_test.go +++ b/go/libraries/doltcore/sqle/statspro/bucket_builder_test.go @@ -61,27 +61,27 @@ func TestBucketBuilder(t *testing.T) { name string keys []sql.Row keyDesc val.TupleDesc - bucket DoltBucket + bucket *stats.Bucket }{ { name: "ints", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 5, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{int64(5)}, BoundCnt: 2, - }}, + }, }, { // technically nulls should be at beginning name: "ints with middle nulls", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {nil}, {nil}, {nil}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 16, DistinctCnt: 6, NullCnt: 3, @@ -89,13 +89,13 @@ func TestBucketBuilder(t *testing.T) { McvsCnt: []uint64{}, BoundVal: sql.Row{int64(5)}, BoundCnt: 2, - }}, + }, }, { name: "ints with beginning nulls", keys: []sql.Row{{nil}, {nil}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 6, NullCnt: 2, @@ -103,86 +103,86 @@ func TestBucketBuilder(t *testing.T) { McvsCnt: []uint64{}, BoundVal: sql.Row{int64(5)}, BoundCnt: 2, - }}, + }, }, { name: "more ints", keys: []sql.Row{{1}, {1}, {1}, {2}, {2}, {2}, {2}, {3}, {3}, {3}, {4}, {4}, {4}, {5}, {5}, {5}, {5}, {6}, {6}, {6}, {6}, {7}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 22, DistinctCnt: 7, BoundCnt: 1, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{int64(7)}, - }}, + }, }, { name: "2-ints", keys: []sql.Row{{1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 3}, {2, 3}, {3, 1}, {3, 2}, {3, 3}, {4, 1}, {4, 1}, {4, 1}, {5, 1}, {5, 2}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 11, McvVals: []sql.Row{{int64(4), int64(1)}}, McvsCnt: []uint64{3}, BoundVal: sql.Row{int64(5), int64(2)}, BoundCnt: 1, - }}, + }, }, { name: "2-ints with nulls", keys: []sql.Row{{nil, 1}, {1, nil}, {1, 2}, {2, nil}, {2, 2}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: true}, val.Type{Enc: val.Int64Enc, Nullable: true}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 5, DistinctCnt: 5, NullCnt: 3, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{int64(2), int64(2)}, - BoundCnt: 1}, + BoundCnt: 1, }, }, { name: "varchars", keys: []sql.Row{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"e"}, {"f"}, {"g"}, {"g"}, {"g"}, {"h"}, {"h"}, {"h"}, {"i"}, {"i"}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 9, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{"i"}, BoundCnt: 2, - }}, + }, }, { name: "varchar-ints", keys: []sql.Row{{"a", 1}, {"b", 1}, {"c", 1}, {"d", 1}, {"e", 1}, {"e", 2}, {"f", 1}, {"g", 1}, {"g", 2}, {"g", 2}, {"h", 1}, {"h", 1}, {"h", 2}, {"i", 1}, {"i", 1}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}, val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 15, DistinctCnt: 12, McvVals: []sql.Row{}, McvsCnt: []uint64{}, BoundVal: sql.Row{"i", int64(1)}, BoundCnt: 2, - }}, + }, }, { name: "mcvs", keys: []sql.Row{{1}, {2}, {3}, {4}, {5}, {6}, {7}, {7}, {7}, {7}, {8}, {9}, {10}, {10}, {10}, {11}, {12}, {13}, {14}, {15}, {20}, {21}, {22}}, keyDesc: val.NewTupleDescriptor(val.Type{Enc: val.Int64Enc, Nullable: false}), - bucket: DoltBucket{Bucket: &stats.Bucket{ + bucket: &stats.Bucket{ RowCnt: 23, DistinctCnt: 18, McvVals: []sql.Row{{int64(10)}, {int64(7)}}, McvsCnt: []uint64{3, 4}, BoundVal: sql.Row{int64(22)}, BoundCnt: 1, - }}, + }, }, } diff --git a/go/libraries/doltcore/sqle/statspro/configure.go b/go/libraries/doltcore/sqle/statspro/configure.go deleted file mode 100644 index f8492a08b61..00000000000 --- a/go/libraries/doltcore/sqle/statspro/configure.go +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/dolthub/go-mysql-server/sql" - types2 "github.com/dolthub/go-mysql-server/sql/types" - - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/utils/filesys" -) - -var helpMsg = "call dolt_stats_purge() to reset statistics" - -func (p *Provider) Configure(ctx context.Context, ctxFactory func(ctx context.Context) (*sql.Context, error), bThreads *sql.BackgroundThreads, dbs []dsess.SqlDatabase) error { - p.SetStarter(NewStatsInitDatabaseHook(p, ctxFactory, bThreads)) - - if _, disabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsMemoryOnly); disabled == int8(1) { - return nil - } - - loadCtx, err := ctxFactory(ctx) - if err != nil { - return err - } - - branches := p.getStatsBranches(loadCtx) - - var autoEnabled bool - var startupEnabled bool - var intervalSec time.Duration - var thresholdf64 float64 - if _, enabled, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshEnabled); enabled == int8(1) { - autoEnabled = true - _, threshold, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshThreshold) - _, interval, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsAutoRefreshInterval) - interval64, _, _ := types2.Int64.Convert(interval) - intervalSec = time.Second * time.Duration(interval64.(int64)) - thresholdf64 = threshold.(float64) - - p.pro.InitDatabaseHooks = append(p.pro.InitDatabaseHooks, NewStatsInitDatabaseHook(p, ctxFactory, bThreads)) - p.pro.DropDatabaseHooks = append([]sqle.DropDatabaseHook{NewStatsDropDatabaseHook(p)}, p.pro.DropDatabaseHooks...) - } else if _, startupStats, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBootstrapEnabled); startupStats == int8(1) { - startupEnabled = true - } - - eg, ctx := loadCtx.NewErrgroup() - for _, db := range dbs { - // copy closure variables - db := db - eg.Go(func() (err error) { - defer func() { - if r := recover(); r != nil { - if str, ok := r.(fmt.Stringer); ok { - err = fmt.Errorf("%w: %s", ErrFailedToLoad, str.String()) - } else { - err = fmt.Errorf("%w: %v", ErrFailedToLoad, r) - } - return - } - }() - - fs, err := p.pro.FileSystemForDatabase(db.Name()) - if err != nil { - return err - } - - if p.Load(loadCtx, fs, db, branches); err != nil { - return err - } - if autoEnabled { - return p.InitAutoRefreshWithParams(ctxFactory, db.Name(), bThreads, intervalSec, thresholdf64, branches) - } else if startupEnabled { - if err := p.BootstrapDatabaseStats(loadCtx, db.Name()); err != nil { - return err - } - } - return nil - }) - } - return eg.Wait() -} - -// getStatsBranches returns the set of branches whose statistics are tracked. -// The order of precedence is (1) global variable, (2) session current branch, -// (3) engine default branch. -func (p *Provider) getStatsBranches(ctx *sql.Context) []string { - dSess := dsess.DSessFromSess(ctx.Session) - var branches []string - if _, bs, _ := sql.SystemVariables.GetGlobal(dsess.DoltStatsBranches); bs == "" { - defaultBranch, _ := dSess.GetBranch() - if defaultBranch != "" { - branches = append(branches, defaultBranch) - } - } else { - for _, branch := range strings.Split(bs.(string), ",") { - branches = append(branches, strings.TrimSpace(branch)) - } - } - - if branches == nil { - branches = append(branches, p.pro.DefaultBranch()) - } - return branches -} - -func (p *Provider) LoadStats(ctx *sql.Context, db, branch string) error { - if statDb, ok := p.getStatDb(db); ok { - return statDb.LoadBranchStats(ctx, branch) - } - return nil -} - -// Load scans the statistics tables, populating the |stats| attribute. -// Statistics are not available for reading until we've finished loading. -func (p *Provider) Load(ctx *sql.Context, fs filesys.Filesys, db dsess.SqlDatabase, branches []string) { - // |statPath| is either file://./stat or mem://stat - statsDb, err := p.sf.Init(ctx, db, p.pro, fs, env.GetCurrentUserHomeDir) - if err != nil { - ctx.GetLogger().Errorf("initialize stats failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) - return - } - - for _, branch := range branches { - if err = statsDb.LoadBranchStats(ctx, branch); err != nil { - // if branch name is invalid, continue loading rest - // TODO: differentiate bad branch name from other errors - ctx.GetLogger().Errorf("load stats init failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) - continue - } - if err := statsDb.Flush(ctx, branch); err != nil { - ctx.GetLogger().Errorf("load stats flush failure for %s: %s; %s\n", db.Name(), err.Error(), helpMsg) - continue - } - } - - p.setStatDb(strings.ToLower(db.Name()), statsDb) - return -} diff --git a/go/libraries/doltcore/sqle/statspro/doc.go b/go/libraries/doltcore/sqle/statspro/doc.go new file mode 100644 index 00000000000..51c1cdbbd0b --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/doc.go @@ -0,0 +1,81 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +// Package statspro provides an event loop that manages table statistics +// management and access. +// +// At any given time there is one thread responsible for pulling work +// from the job queue to execute. The thread has exclusive ownership +// over the job channel. +// +// All stats are persisted within a single database. If there are multiple +// databases, one is selected by random as the storage target. If during +// initialization multiple databases have stats, one will be chosen by +// random as the target. If a database changes between server restarts, +// the storage stats will be useless but not impair regular operations because +// storage is only ever a best-effort content-addressed persistence layer; +// buckets will be regenerated if they are missing. If the database acting +// as a storage target is deleted, we swap the cache to write to a new storage +// target that still exists. +// +// The main data structures: +// - Table statistics map, that returns a list of table index statistics +// for a specific branch, database, and table name. +// - Object caches: +// - Bucket cache: Chunk addressed hash map. All provider histogram +// references point to objects in the bucket cache. Backed by a +// best-effort on-disk prolly.Map to make restarts faster. +// - Template cache: Table-schema/index addressed stats.Statistics object +// for a specific index. +// - Bound cache: Chunk addressed first row for an index histogram. +// +// Work is broken down into: +// - A basic update cycle of (1) seed database tables, (2) create or pull +// buckets from disk, (3) commit statistics accessed by the provider. +// - GC cycle: Mark and sweep the most recent context's active set into +// new cache/prolly.Map objects. +// - Branch sync: Update the tracked set of branch-qualified databases. +// +// Regular jobs, GC, and branch-sync are all controlled by tickers at the +// top level that controls that maximum rate of calling each. GC and +// branch-sync are prioritized before jobs, and therefore rate-limited to +// allow the job queue to flush in-between calls. +// +// DDL operations and branch create/delete are concurrent to the event +// loop. We require an extra fixed-sized queue as an intermediary to the +// job queue to protect the main thread's ownership. DDL acquiring the +// provider lock is a deadlock risk -- we cannot do any provider checks +// while holding the db lock. And lastly, the way update jobs are split +// up over time means we need to do special checks when finalizing a set +// of database stats. A race between deleting a database and finalizing +// statistics needs to end with no statistics, which requires a delete check +// for when finalize wins a race. +// +// The stats lifecycle can be controlled with: +// - dolt_stats_stop: clear queue and disable thread +// - dolt_stats_restart: clear queue, refresh queue, start thread +// - dolt_stats_purge: clear queue, refresh queue, clear cache, +// disable thread +// - dolt_stats_validate: return report of cache misses for current +// root value. +// +// `dolt_stats_wait` is additionally useful for blocking on a full +// queue cycle and then validating whether the session head is caught up. +// +// `dolt_stats_sync` can be used to grab the most up-to-date branch set +// for each database. This races with branch ticker and concurrent +// database/branch adds. +// diff --git a/go/libraries/doltcore/sqle/statspro/dolt_stats.go b/go/libraries/doltcore/sqle/statspro/dolt_stats.go deleted file mode 100644 index 4c5d43250c9..00000000000 --- a/go/libraries/doltcore/sqle/statspro/dolt_stats.go +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/stats" - - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/val" -) - -type DoltStats struct { - Statistic *stats.Statistic - mu *sync.Mutex - // Chunks is a list of addresses for the histogram fanout level - Chunks []hash.Hash - // Active maps a chunk/bucket address to its position in - // the histogram. 1-indexed to differentiate from an empty - // field on disk - Active map[hash.Hash]int - Hist sql.Histogram - Tb *val.TupleBuilder -} - -func (s *DoltStats) Clone(_ context.Context) sql.JSONWrapper { - return s -} - -var _ sql.Statistic = (*DoltStats)(nil) - -func (s *DoltStats) SetChunks(h []hash.Hash) { - s.mu.Lock() - defer s.mu.Unlock() - s.Chunks = h -} - -func (s *DoltStats) WithColSet(set sql.ColSet) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithColSet(set).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithFuncDeps(set *sql.FuncDepSet) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithFuncDeps(set).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithDistinctCount(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithDistinctCount(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithRowCount(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithRowCount(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithNullCount(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithNullCount(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithAvgSize(u uint64) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithAvgSize(u).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) WithLowerBound(row sql.Row) sql.Statistic { - ret := *s - ret.Statistic = ret.Statistic.WithLowerBound(row).(*stats.Statistic) - return &ret -} - -func (s *DoltStats) RowCount() uint64 { - return s.Statistic.RowCount() -} - -func (s *DoltStats) DistinctCount() uint64 { - return s.Statistic.DistinctCount() -} - -func (s *DoltStats) NullCount() uint64 { - return s.Statistic.NullCount() - -} - -func (s *DoltStats) AvgSize() uint64 { - return s.Statistic.AvgSize() - -} - -func (s *DoltStats) CreatedAt() time.Time { - return s.Statistic.CreatedAt() - -} - -func (s *DoltStats) Columns() []string { - return s.Statistic.Columns() -} - -func (s *DoltStats) Types() []sql.Type { - return s.Statistic.Types() -} - -func (s *DoltStats) Qualifier() sql.StatQualifier { - return s.Statistic.Qualifier() -} - -func (s *DoltStats) IndexClass() sql.IndexClass { - return s.Statistic.IndexClass() -} - -func (s *DoltStats) FuncDeps() *sql.FuncDepSet { - return s.Statistic.FuncDeps() -} - -func (s *DoltStats) ColSet() sql.ColSet { - return s.Statistic.ColSet() -} - -func (s *DoltStats) LowerBound() sql.Row { - return s.Statistic.LowerBound() -} - -func NewDoltStats() *DoltStats { - return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}} -} - -func (s *DoltStats) ToInterface() (interface{}, error) { - statVal, err := s.Statistic.ToInterface() - if err != nil { - return nil, err - } - ret := statVal.(map[string]interface{}) - - var hist sql.Histogram - for _, b := range s.Hist { - hist = append(hist, b) - } - histVal, err := hist.ToInterface() - if err != nil { - return nil, err - } - ret["statistic"].(map[string]interface{})["buckets"] = histVal - return ret, nil -} - -func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) { - s.mu.Lock() - defer s.mu.Unlock() - ret := *s - ret.Hist = nil - for _, b := range h { - doltB, ok := b.(DoltBucket) - if !ok { - return nil, fmt.Errorf("invalid bucket type: %T, %s", b, h.DebugString()) - } - ret.Hist = append(ret.Hist, doltB) - } - return &ret, nil -} - -func (s *DoltStats) Histogram() sql.Histogram { - s.mu.Lock() - defer s.mu.Unlock() - return s.Hist -} - -func DoltStatsFromSql(stat sql.Statistic) (*DoltStats, error) { - hist, err := DoltHistFromSql(stat.Histogram(), stat.Types()) - if err != nil { - return nil, err - } - ret := &DoltStats{ - mu: &sync.Mutex{}, - Hist: hist, - Statistic: stats.NewStatistic(stat.RowCount(), stat.DistinctCount(), stat.NullCount(), stat.AvgSize(), stat.CreatedAt(), stat.Qualifier(), stat.Columns(), stat.Types(), nil, stat.IndexClass(), stat.LowerBound()), - Active: make(map[hash.Hash]int), - } - ret.Statistic.Fds = stat.FuncDeps() - ret.Statistic.Colset = stat.ColSet() - return ret, nil -} - -func (s *DoltStats) UpdateActive() { - s.mu.Lock() - defer s.mu.Unlock() - newActive := make(map[hash.Hash]int) - for i, hash := range s.Chunks { - newActive[hash] = i - } - s.Active = newActive -} - -type DoltHistogram []DoltBucket - -type DoltBucket struct { - Bucket *stats.Bucket - Chunk hash.Hash - Created time.Time -} - -func (d DoltBucket) RowCount() uint64 { - return d.Bucket.RowCount() -} - -func (d DoltBucket) DistinctCount() uint64 { - return d.Bucket.DistinctCount() -} - -func (d DoltBucket) NullCount() uint64 { - return d.Bucket.NullCount() -} - -func (d DoltBucket) BoundCount() uint64 { - return d.Bucket.BoundCount() -} - -func (d DoltBucket) UpperBound() sql.Row { - return d.Bucket.UpperBound() -} - -func (d DoltBucket) McvCounts() []uint64 { - return d.Bucket.McvCounts() -} - -func (d DoltBucket) Mcvs() []sql.Row { - return d.Bucket.Mcvs() -} - -func DoltBucketChunk(b sql.HistogramBucket) hash.Hash { - return b.(DoltBucket).Chunk -} - -func DoltBucketCreated(b sql.HistogramBucket) time.Time { - return b.(DoltBucket).Created -} - -var _ sql.HistogramBucket = (*DoltBucket)(nil) - -func DoltHistFromSql(hist sql.Histogram, types []sql.Type) (sql.Histogram, error) { - ret := make(sql.Histogram, len(hist)) - var err error - for i, b := range hist { - upperBound := make(sql.Row, len(b.UpperBound())) - for i, v := range b.UpperBound() { - upperBound[i], _, err = types[i].Convert(v) - if err != nil { - return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String()) - } - } - mcvs := make([]sql.Row, len(b.Mcvs())) - for i, mcv := range b.Mcvs() { - for _, v := range mcv { - conv, _, err := types[i].Convert(v) - if err != nil { - return nil, fmt.Errorf("failed to convert %v to type %s", v, types[i].String()) - } - mcvs[i] = append(mcvs[i], conv) - } - } - ret[i] = DoltBucket{ - Bucket: stats.NewHistogramBucket(b.RowCount(), b.DistinctCount(), b.NullCount(), b.BoundCount(), upperBound, b.McvCounts(), mcvs).(*stats.Bucket), - } - } - return ret, nil -} diff --git a/go/libraries/doltcore/sqle/statspro/initdbhook.go b/go/libraries/doltcore/sqle/statspro/initdbhook.go index 8e11408ea59..1a31a1055bd 100644 --- a/go/libraries/doltcore/sqle/statspro/initdbhook.go +++ b/go/libraries/doltcore/sqle/statspro/initdbhook.go @@ -15,10 +15,6 @@ package statspro import ( - "context" - "fmt" - "strings" - "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/dolt/go/libraries/doltcore/env" @@ -26,67 +22,29 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" ) -func NewStatsInitDatabaseHook( - statsProv *Provider, - ctxFactory func(ctx context.Context) (*sql.Context, error), - bThreads *sql.BackgroundThreads, -) sqle.InitDatabaseHook { +func NewInitDatabaseHook(sc *StatsCoord) sqle.InitDatabaseHook { return func( ctx *sql.Context, - pro *sqle.DoltDatabaseProvider, + _ *sqle.DoltDatabaseProvider, name string, denv *env.DoltEnv, db dsess.SqlDatabase, ) error { - dbName := strings.ToLower(db.Name()) - if statsDb, ok := statsProv.getStatDb(dbName); !ok { - statsDb, err := statsProv.sf.Init(ctx, db, statsProv.pro, denv.FS, env.GetCurrentUserHomeDir) - if err != nil { - ctx.GetLogger().Debugf("statistics load error: %s", err.Error()) - return nil - } - statsProv.setStatDb(dbName, statsDb) - } else { - dSess := dsess.DSessFromSess(ctx.Session) - for _, br := range statsDb.Branches() { - branchQDbName := BranchQualifiedDatabase(dbName, br) - sqlDb, err := dSess.Provider().Database(ctx, branchQDbName) - if err != nil { - ctx.GetLogger().Logger.Errorf("branch not found: %s", br) - continue - } - branchQDb, ok := sqlDb.(dsess.SqlDatabase) - if !ok { - return fmt.Errorf("branch/database not found: %s", branchQDbName) - } - - if ok, err := statsDb.SchemaChange(ctx, br, branchQDb); err != nil { - return err - } else if ok { - if err := statsDb.DeleteBranchStats(ctx, br, true); err != nil { - return err - } - } - } - ctx.GetLogger().Debugf("statistics init error: preexisting stats db: %s", dbName) + sqlDb, ok := db.(sqle.Database) + if !ok { + return nil } - ctx.GetLogger().Debugf("statistics refresh: initialize %s", name) - return statsProv.InitAutoRefresh(ctxFactory, name, bThreads) + + // call should only fail if backpressure in secondary queue + sc.AddFs(sqlDb, denv.FS) + return nil } } -func NewStatsDropDatabaseHook(statsProv *Provider) sqle.DropDatabaseHook { +func NewDropDatabaseHook(sc *StatsCoord) sqle.DropDatabaseHook { return func(ctx *sql.Context, name string) { - statsProv.CancelRefreshThread(name) - if err := statsProv.DropDbStats(ctx, name, false); err != nil { + if err := sc.DropDbStats(ctx, name, false); err != nil { ctx.GetLogger().Debugf("failed to close stats database: %s", err) } - - if db, ok := statsProv.getStatDb(name); ok { - if err := db.Close(); err != nil { - ctx.GetLogger().Debugf("failed to close stats database: %s", err) - } - delete(statsProv.statDbs, name) - } } } diff --git a/go/libraries/doltcore/sqle/statspro/interface.go b/go/libraries/doltcore/sqle/statspro/interface.go deleted file mode 100644 index 5a423466f91..00000000000 --- a/go/libraries/doltcore/sqle/statspro/interface.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2024 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/libraries/utils/filesys" - "github.com/dolthub/dolt/go/store/hash" -) - -// Database is a backing store for a collection of DoltStats. -// Each stats database tracks a user database, with multiple -// branches potentially each having their own statistics. -type Database interface { - // ListStatQuals returns the list of index statistics for a branch. - ListStatQuals(branch string) []sql.StatQualifier - // LoadBranchStats starts tracking a specific branch's statistics. - LoadBranchStats(ctx *sql.Context, branch string) error - // DeleteBranchStats removes references to in memory index statistics. - // If |flush| is true delete the data from storage. - DeleteBranchStats(ctx *sql.Context, branch string, flush bool) error - // GetStat returns a branch's index statistics. - GetStat(branch string, qual sql.StatQualifier) (*DoltStats, bool) - //SetStat bulk replaces the statistic, deleting any previous version - SetStat(ctx context.Context, branch string, qual sql.StatQualifier, stats *DoltStats) error - //DeleteStats deletes a list of index statistics. - DeleteStats(ctx *sql.Context, branch string, quals ...sql.StatQualifier) - // ReplaceChunks is an update interface that lets a stats implementation - // decide how to edit stats for a stats refresh. - ReplaceChunks(ctx context.Context, branch string, qual sql.StatQualifier, targetHashes []hash.Hash, dropChunks, newChunks []sql.HistogramBucket) error - // Flush instructs the database to sync any partial state to disk - Flush(ctx context.Context, branch string) error - // Close finalizes any file references. - Close() error - // SetTableHash updates the most recently tracked table stats table hash - SetTableHash(branch, tableName string, h hash.Hash) - // GetTableHash returns the most recently tracked table stats table hash - GetTableHash(branch, tableName string) hash.Hash - // SetSchemaHash updates the most recently stored table stat's schema hash - SetSchemaHash(ctx context.Context, branch, tableName string, h hash.Hash) error - // GetSchemaHash returns the schema hash for the latest stored statistics - GetSchemaHash(ctx context.Context, branch, tableName string) (hash.Hash, error) - // Branches returns the set of branches with tracked statistics databases - Branches() []string - // SchemaChange returns false if any table schema in the session - // root is incompatible with the latest schema used to create a stored - // set of statistics. - SchemaChange(ctx *sql.Context, branch string, branchQdb dsess.SqlDatabase) (bool, error) -} - -// StatsFactory instances construct statistic databases. -type StatsFactory interface { - // Init gets a reference to the stats database for a dolt database - // rooted at the given filesystem. It will create the database if - // it does not exist. - Init(ctx *sql.Context, sourceDb dsess.SqlDatabase, prov *sqle.DoltDatabaseProvider, fs filesys.Filesys, hdp env.HomeDirProvider) (Database, error) -} diff --git a/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go new file mode 100644 index 00000000000..15d28e2115b --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue.go @@ -0,0 +1,366 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobqueue + +import ( + "context" + "errors" + "sync" + "sync/atomic" + + "github.com/dolthub/dolt/go/libraries/utils/circular" +) + +// A SerialQueue is a job queue which runs one job at a time. Jobs are +// run in the order they are submitted, with the exception that every +// interrupt job is run before any normal priority job. +// +// A SerialQueue can be paused, in which case it will accept new +// submissions, but will not run them until it is started again. +// +// A SerialQueue can be purged, which deletes any pending jobs from +// it. +// +// A SerialQueue can be stopped, in which case it will not accept new +// submissions and no pending work will be run. Stopping a queue does +// not purge it, but it is easy for a caller to stop and purge the +// queue. +// +// A stopped or paused SerialQueue can be started, which will cause it +// to start running submitted jobs again, including any unpurged jobs +// which were pending when it was stopped or paused. +// +// A SerialQueue runs background threads to coordinate its +// behavior. These background threads are launched with a `Context` +// supplied to its |Run| method. If that `Context` ever becomes +// `Done`, the SerialQueue termainally enters a completed state. +// +// In general, jobs running on the queue should not block indefinitely +// and should be very careful about any synchronization. It is safe +// for jobs within the queue to call DoAsync, InterruptAsync, Stop, +// Pause, Purge and Start on the queue itself. It is a deadlock for a +// job within the queue to perform a DoSync or InterruptSync on the +// queue itself, although that deadlock may be resolved if the +// provided |ctx| ends up |Done|. +type SerialQueue struct { + running atomic.Bool + + // If the queue is terminally completed, this will be closed. + // Submissions to the queue scheduler select on this channel + // to return errors if the scheduler is no longer accepting + // work. + completed chan struct{} + + runnerCh chan work + schedCh chan schedReq +} + +var ErrStoppedQueue = errors.New("stopped queue: cannot submit work to a stopped queue.") +var ErrCompletedQueue = errors.New("completed queue: the queue is no longer running.") + +// Create a new serial queue. All of the methods on the returned +// SerialQueue block indefinitely until its |Run| method is called. +func NewSerialQueue() *SerialQueue { + return &SerialQueue{ + completed: make(chan struct{}), + runnerCh: make(chan work), + schedCh: make(chan schedReq), + } +} + +// Run the serial queue's background threads with this |ctx|. If the +// |ctx| ever becomes |Done|, the queue enters a terminal completed +// state. It is an error to call this function more than once. +func (s *SerialQueue) Run(ctx context.Context) { + if !s.running.CompareAndSwap(false, true) { + panic("Cannot run a SerialQueue more than once.") + } + defer close(s.completed) + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + s.runScheduler(ctx) + }() + go func() { + defer wg.Done() + s.runRunner(ctx) + }() + wg.Wait() +} + +// Start the queue. The queue can be in any state, including already started. +func (s *SerialQueue) Start() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Start, + resp: make(chan schedResp, 1), + }) +} + +// Pause the queue. The queue can be in any state, including already +// paused. Note that pausing the queue does not block on any +// currently running job to complete. A pattern to pause the queue +// with a guarantee that nothing is currently running is: +// +// s.InterruptSync(context.Background(), func() { s.Pause() }) +func (s *SerialQueue) Pause() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Pause, + resp: make(chan schedResp, 1), + }) +} + +// Stop the queue. The queue can be in any state, including already +// stopped. Note that stopping the queue does not block on any +// currently running job to complete. +func (s *SerialQueue) Stop() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Stop, + resp: make(chan schedResp, 1), + }) +} + +// Purge the queue. All pending jobs will be dropped. +func (s *SerialQueue) Purge() error { + return s.makeReq(schedReq{ + reqType: schedReqType_Purge, + resp: make(chan schedResp, 1), + }) +} + +// Run a high priority job on the SerialQueue, blocking for its completion. +// If done against a Paused queue, this could block indefinitely. The +// block for completion is gated on the |ctx|. +func (s *SerialQueue) InterruptSync(ctx context.Context, f func()) error { + w, err := s.submitWork(schedPriority_High, f) + if err != nil { + return err + } + select { + case <-w.done: + return nil + case <-ctx.Done(): + return context.Cause(ctx) + case <-s.completed: + return ErrCompletedQueue + } +} + +// Run a normal priority job on the SerialQueue, blocking for its completion. +// When done against a paused queue, this can block indefinitely. +func (s *SerialQueue) DoSync(ctx context.Context, f func()) error { + w, err := s.submitWork(schedPriority_Normal, f) + if err != nil { + return err + } + select { + case <-w.done: + return nil + case <-ctx.Done(): + return context.Cause(ctx) + case <-s.completed: + return ErrCompletedQueue + } +} + +// Run a high priority job asynchronously on the queue. Returns once the +// job is accepted. +func (s *SerialQueue) InterruptAsync(f func()) error { + _, err := s.submitWork(schedPriority_High, f) + if err != nil { + return err + } + return nil +} + +// Run a normal priority job asynchronously on the queue. Returns once the +// job is accepted. +func (s *SerialQueue) DoAsync(f func()) error { + _, err := s.submitWork(schedPriority_Normal, f) + if err != nil { + return err + } + return nil +} + +// Helper function to submit work. Returns the work submitted, if it +// was successful, and an error otherwise. +func (s *SerialQueue) submitWork(pri schedPriority, f func()) (work, error) { + w := work{ + f: f, + done: make(chan struct{}), + } + err := s.makeReq(schedReq{ + reqType: schedReqType_Enqueue, + pri: pri, + work: w, + resp: make(chan schedResp, 1), + }) + if err != nil { + return work{}, err + } + return w, nil +} + +func (s *SerialQueue) makeReq(req schedReq) error { + select { + case s.schedCh <- req: + resp := <-req.resp + return resp.err + case <-s.completed: + return ErrCompletedQueue + } +} + +// Read off the input channels and maintain queues of pending work. +// Deliver that work to the runner channel if it is desired. +func (s *SerialQueue) runScheduler(ctx context.Context) { + state := schedState_Running + normalQ := circular.NewBuff[work](16) + highQ := circular.NewBuff[work](16) + for { + var sendWorkCh chan work + var sendWork work + var sentWorkCallback func() + + if state == schedState_Running { + if highQ.Len() > 0 { + sendWorkCh = s.runnerCh + sendWork = highQ.Front() + sentWorkCallback = highQ.Pop + } else if normalQ.Len() > 0 { + sendWorkCh = s.runnerCh + sendWork = normalQ.Front() + sentWorkCallback = normalQ.Pop + } + } + + select { + case msg := <-s.schedCh: + switch msg.reqType { + case schedReqType_Enqueue: + if state == schedState_Stopped { + msg.resp <- schedResp{ + err: ErrStoppedQueue, + } + } else { + if msg.pri == schedPriority_High { + highQ.Push(msg.work) + } else { + normalQ.Push(msg.work) + } + msg.resp <- schedResp{ + err: nil, + } + } + case schedReqType_Purge: + highQ = circular.NewBuff[work](highQ.Cap()) + normalQ = circular.NewBuff[work](normalQ.Cap()) + msg.resp <- schedResp{ + err: nil, + } + case schedReqType_Start: + state = schedState_Running + msg.resp <- schedResp{ + err: nil, + } + case schedReqType_Pause: + state = schedState_Paused + msg.resp <- schedResp{ + err: nil, + } + case schedReqType_Stop: + state = schedState_Stopped + msg.resp <- schedResp{ + err: nil, + } + } + case sendWorkCh <- sendWork: + // Pop from queue the work came from. + sentWorkCallback() + case <-ctx.Done(): + return + } + } +} + +// Read off the runner channel and run the submitted work. +func (s *SerialQueue) runRunner(ctx context.Context) { + for { + select { + case w := <-s.runnerCh: + w.f() + close(w.done) + case <-ctx.Done(): + return + } + } +} + +// |work| represents work to be run on the runner goroutine. +type work struct { + // The function to call. + f func() + // The channel to close after the work is run. + done chan struct{} +} + +type schedState int + +const ( + // When scheduler is running, it is willing to accept new work + // and to give work to the work thread. + schedState_Running schedState = iota + // When scheduler is paused, it is willing to accept new work + // but it does not give work to the work thread. + schedState_Paused + // When scheduler is stopped, it does not accept new work + // and it does not give work to the work thread. + schedState_Stopped +) + +type schedReqType int + +const ( + schedReqType_Enqueue schedReqType = iota + schedReqType_Purge + schedReqType_Start + schedReqType_Pause + schedReqType_Stop +) + +type schedPriority int + +const ( + schedPriority_Normal schedPriority = iota + schedPriority_High +) + +// Incoming message for the scheduler thread. +type schedReq struct { + reqType schedReqType + // Always set, the scheduler's response is + // sent through this channel. The send + // must never block. + resp chan schedResp + // Set when |reqType| is Enqueue + pri schedPriority + // Set when |reqType| is Enqueue + work work +} + +type schedResp struct { + err error +} diff --git a/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go new file mode 100644 index 00000000000..dd603cc7903 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/jobqueue/serialqueue_test.go @@ -0,0 +1,279 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jobqueue + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestSerialQueue(t *testing.T) { + t.Run("CanceledRunContext", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + queue := NewSerialQueue() + // This should return. + queue.Run(ctx) + // Now all methods should return ErrCompletedQueue. + assert.ErrorIs(t, queue.Start(), ErrCompletedQueue) + assert.ErrorIs(t, queue.Pause(), ErrCompletedQueue) + assert.ErrorIs(t, queue.Stop(), ErrCompletedQueue) + assert.ErrorIs(t, queue.DoSync(context.Background(), func() {}), ErrCompletedQueue) + assert.ErrorIs(t, queue.DoAsync(func() {}), ErrCompletedQueue) + assert.ErrorIs(t, queue.InterruptSync(context.Background(), func() {}), ErrCompletedQueue) + assert.ErrorIs(t, queue.InterruptAsync(func() {}), ErrCompletedQueue) + }) + t.Run("StartsRunning", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + var ran bool + err := queue.DoSync(context.Background(), func() { + ran = true + }) + assert.NoError(t, err) + assert.True(t, ran, "the sync task ran.") + cancel() + wg.Wait() + }) + t.Run("StoppedQueueReturnsError", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + assert.NoError(t, queue.Stop()) + err := queue.DoSync(context.Background(), func() {}) + assert.ErrorIs(t, err, ErrStoppedQueue) + cancel() + wg.Wait() + }) + t.Run("PausedQueueDoesNotRun", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + assert.NoError(t, queue.Pause()) + var ran bool + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() { + ran = true + }) + assert.NoError(t, err) + } + cancel() + wg.Wait() + assert.False(t, ran, "work did not run on the paused queue.") + }) + t.Run("StartingPausedQueueRunsIt", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + assert.NoError(t, queue.Pause()) + var ran bool + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() { + ran = true + }) + assert.NoError(t, err) + } + assert.NoError(t, queue.Start()) + err := queue.DoSync(context.Background(), func() {}) + assert.NoError(t, err) + assert.True(t, ran, "work ran after the paused queue was started.") + cancel() + wg.Wait() + }) + t.Run("InterruptWorkRunsFirst", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + assert.NoError(t, queue.Pause()) + var cnt int + queue.DoAsync(func() { + assert.Equal(t, cnt, 2) + cnt += 1 + }) + queue.DoAsync(func() { + assert.Equal(t, cnt, 3) + cnt += 1 + }) + queue.InterruptAsync(func() { + assert.Equal(t, cnt, 0) + cnt += 1 + }) + queue.InterruptAsync(func() { + assert.Equal(t, cnt, 1) + cnt += 1 + }) + assert.NoError(t, queue.Start()) + assert.NoError(t, queue.DoSync(context.Background(), func() {})) + assert.Equal(t, cnt, 4) + cancel() + wg.Wait() + }) + t.Run("StopFromQueue", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + var cnt int + for i := 0; i < 16; i++ { + // Some of these calls my error, since the queue + // will be stopped asynchronously. + queue.DoAsync(func() { + cnt += 1 + assert.NoError(t, queue.Stop()) + }) + } + assert.Equal(t, cnt, 1) + cancel() + wg.Wait() + }) + t.Run("PauseFromQueue", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + var cnt int + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() { + cnt += 1 + assert.NoError(t, queue.Pause()) + }) + assert.NoError(t, err) + } + assert.Equal(t, cnt, 1) + cancel() + wg.Wait() + }) + t.Run("PurgeFromQueue", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + assert.NoError(t, queue.Pause()) + var cnt int + didRun := make(chan struct{}) + for i := 0; i < 16; i++ { + err := queue.DoAsync(func() { + cnt += 1 + assert.NoError(t, queue.Purge()) + close(didRun) + }) + assert.NoError(t, err) + } + assert.NoError(t, queue.Start()) + <-didRun + assert.NoError(t, queue.DoSync(context.Background(), func() {})) + assert.Equal(t, cnt, 1) + cancel() + wg.Wait() + }) + t.Run("DoSyncInQueueDeadlockWithContext", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + var cnt int + err := queue.DoSync(context.Background(), func() { + cnt += 1 + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + err := queue.DoSync(ctx, func() { + cnt += 1 + }) + assert.ErrorIs(t, err, context.DeadlineExceeded) + }) + assert.NoError(t, err) + assert.NoError(t, queue.DoSync(context.Background(), func() {})) + // Both tasks eventually ran... + assert.Equal(t, cnt, 2) + cancel() + wg.Wait() + }) + t.Run("SyncReturnsErrCompletedQueueAfterWorkAccepted", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + queue := NewSerialQueue() + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + queue.Run(ctx) + }() + queue.Pause() + var err error + var ran bool + wg.Add(1) + go func() { + defer wg.Done() + err = queue.InterruptSync(context.Background(), func() { + ran = true + }) + }() + wg.Add(1) + go func() { + defer wg.Done() + time.Sleep(100 * time.Millisecond) + queue.Stop() + }() + cancel() + wg.Wait() + assert.ErrorIs(t, err, ErrCompletedQueue) + assert.False(t, ran, "the interrupt task never ran.") + }) +} diff --git a/go/libraries/doltcore/sqle/statspro/noop_provider.go b/go/libraries/doltcore/sqle/statspro/noop_provider.go new file mode 100644 index 00000000000..204f1238e0e --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/noop_provider.go @@ -0,0 +1,82 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "github.com/dolthub/go-mysql-server/sql" + + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" +) + +type StatsNoop struct{} + +func (s StatsNoop) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { + return nil, nil +} + +func (s StatsNoop) RefreshTableStats(ctx *sql.Context, table sql.Table, db string) error { + return nil +} + +func (s StatsNoop) SetStats(ctx *sql.Context, stats sql.Statistic) error { + return nil +} + +func (s StatsNoop) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) { + return nil, false +} + +func (s StatsNoop) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error { + return nil +} + +func (s StatsNoop) DropDbStats(ctx *sql.Context, db string, flush bool) error { + return nil +} + +func (s StatsNoop) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) { + return 0, nil +} + +func (s StatsNoop) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) { + return 0, nil +} + +func (s StatsNoop) CancelRefreshThread(string) { + return +} + +func (s StatsNoop) StartRefreshThread(*sql.Context, dsess.DoltDatabaseProvider, string, *env.DoltEnv, dsess.SqlDatabase) error { + return nil +} + +func (s StatsNoop) ThreadStatus(string) string { + return "stats disabled" +} + +func (s StatsNoop) Prune(ctx *sql.Context) error { + return nil +} + +func (s StatsNoop) Purge(ctx *sql.Context) error { + return nil +} + +func (s StatsNoop) WaitForDbSync(ctx *sql.Context) error { + return nil +} + +var _ sql.StatsProvider = StatsNoop{} diff --git a/go/libraries/doltcore/sqle/statspro/provider.go b/go/libraries/doltcore/sqle/statspro/provider.go new file mode 100644 index 00000000000..88ab86b3f45 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/provider.go @@ -0,0 +1,405 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "fmt" + "path" + "path/filepath" + "strings" + + "github.com/dolthub/dolt/go/cmd/dolt/doltversion" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" + "github.com/dolthub/dolt/go/libraries/utils/earl" + "github.com/dolthub/dolt/go/store/types" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" +) + +var _ sql.StatsProvider = (*StatsCoord)(nil) + +func (sc *StatsCoord) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { + dSess := dsess.DSessFromSess(ctx.Session) + branch, err := dSess.GetBranch() + if err != nil { + return nil, err + } + key := tableIndexesKey{ + db: db, + branch: branch, + table: table.Name(), + } + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + st := sc.Stats[key] + var ret []sql.Statistic + for _, s := range st { + ret = append(ret, s) + } + return ret, nil +} + +func (sc *StatsCoord) RefreshTableStats(ctx *sql.Context, table sql.Table, dbName string) error { + dSess := dsess.DSessFromSess(ctx.Session) + + var branch string + if strings.Contains(dbName, "/") { + parts := strings.Split(dbName, "/") + if len(parts) == 2 { + dbName = parts[0] + branch = parts[1] + } + } + if branch == "" { + branch, err := dSess.GetBranch() + if err != nil { + return err + } + + if branch == "" { + branch = "main" + } + } + + db, err := sc.pro.Database(ctx, dbName) + sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), branch, branch+"/"+dbName) + if err != nil { + return err + } + + tableKey, newTableStats, err := sc.updateTable(ctx, table.Name(), sqlDb) + if err != nil { + return err + } + + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + sc.Stats[tableKey] = newTableStats + return nil +} + +func (sc *StatsCoord) SetStats(ctx *sql.Context, s sql.Statistic) error { + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + ss, ok := s.(*stats.Statistic) + if !ok { + return fmt.Errorf("expected *stats.Statistics, found %T", s) + } + key, err := sc.statsKey(ctx, ss.Qualifier().Db(), ss.Qualifier().Table()) + if err != nil { + return err + } + sc.Stats[key] = sc.Stats[key][:0] + sc.Stats[key] = append(sc.Stats[key], ss) + return nil +} + +func (sc *StatsCoord) GetStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) (sql.Statistic, bool) { + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + key, err := sc.statsKey(ctx, qual.Database, qual.Table()) + if err != nil { + return nil, false + } + for _, s := range sc.Stats[key] { + if strings.EqualFold(s.Qualifier().Index(), qual.Index()) { + return s, true + } + } + return nil, false +} + +func (sc *StatsCoord) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]*stats.Statistic, error) { + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + key := tableIndexesKey{ + db: db, + branch: branch, + table: table, + schema: schema, + } + return sc.Stats[key], nil +} + +func (sc *StatsCoord) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []string) error { + key, err := sc.statsKey(ctx, qual.Database, qual.Table()) + if err != nil { + return err + } + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + delete(sc.Stats, key) + return nil +} + +func (sc *StatsCoord) DropDbStats(ctx *sql.Context, dbName string, flush bool) error { + return sc.sq.InterruptSync(ctx, func() { + if strings.EqualFold(sc.statsBackingDb, dbName) { + delete(sc.dbFs, dbName) + if err := sc.rotateStorage(ctx); err != nil { + sc.descError("drop rotateStorage", err) + } + } + + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + var deleteKeys []tableIndexesKey + for k, _ := range sc.Stats { + if strings.EqualFold(dbName, k.db) { + deleteKeys = append(deleteKeys, k) + } + } + for _, k := range deleteKeys { + delete(sc.Stats, k) + } + }) +} + +func (sc *StatsCoord) statsKey(ctx *sql.Context, dbName, table string) (tableIndexesKey, error) { + dSess := dsess.DSessFromSess(ctx.Session) + branch, err := dSess.GetBranch() + if err != nil { + return tableIndexesKey{}, err + } + key := tableIndexesKey{ + db: dbName, + branch: branch, + table: table, + } + return key, nil +} + +func (sc *StatsCoord) RowCount(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) { + key, err := sc.statsKey(ctx, dbName, table.Name()) + if err != nil { + return 0, err + } + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + for _, s := range sc.Stats[key] { + if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") { + return s.RowCnt, nil + } + } + return 0, nil +} + +func (sc *StatsCoord) DataLength(ctx *sql.Context, dbName string, table sql.Table) (uint64, error) { + key, err := sc.statsKey(ctx, dbName, table.Name()) + if err != nil { + return 0, err + } + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + for _, s := range sc.Stats[key] { + if strings.EqualFold(s.Qualifier().Index(), "PRIMARY") { + return s.RowCnt, nil + } + } + return 0, nil +} + +func (sc *StatsCoord) Init(ctx context.Context, dbs []dsess.SqlDatabase, keepStorage bool) error { + sqlCtx, err := sc.ctxGen(ctx) + if err != nil { + return err + } + for i, db := range dbs { + if db, ok := db.(sqle.Database); ok { // exclude read replica dbs + fs, err := sc.pro.FileSystemForDatabase(db.AliasedName()) + if err != nil { + return err + } + sc.AddFs(db, fs) + if i == 0 && !keepStorage { + if err := sc.rotateStorage(sqlCtx); err != nil { + return err + } + } + } + } + return nil +} + +func (sc *StatsCoord) Purge(ctx *sql.Context) error { + if err := sc.rotateStorage(ctx); err != nil { + return err + } + if err := sc.kv.StartGc(ctx, 0); err != nil { + return err + } + return sc.kv.FinishGc(nil) +} + +func (sc *StatsCoord) rotateStorage(ctx *sql.Context) error { + if sc.statsBackingDb != "" { + if err := sc.rm(sc.statsBackingDb); err != nil { + return err + } + } + + var mem *memStats + switch kv := sc.kv.(type) { + case *prollyStats: + mem = kv.mem + case *memStats: + mem = kv + default: + mem = NewMemStats() + } + + if len(sc.dbFs) == 0 { + sc.kv = mem + sc.statsBackingDb = "" + return nil + } + + var newStorageTarget string + for db, _ := range sc.dbFs { + newStorageTarget = db + break + } + + if err := sc.rm(newStorageTarget); err != nil { + return err + } + + newKv, err := sc.initStorage(ctx, newStorageTarget) + if err != nil { + return err + } + + newKv.mem = mem + sc.kv = newKv + sc.statsBackingDb = newStorageTarget + return nil +} + +func (sc *StatsCoord) rm(db string) error { + fs, ok := sc.dbFs[db] + if !ok { + return fmt.Errorf("failed to remove stats db: %s filesys not found", db) + } + + statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) + if err != nil { + return err + } + + if ok, _ := statsFs.Exists(""); ok { + if err := statsFs.Delete("", true); err != nil { + return err + } + } + + dropDbLoc, err := statsFs.Abs("") + if err != nil { + return err + } + + if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil { + return err + } + return nil +} + +func (sc *StatsCoord) initStorage(ctx *sql.Context, storageTarget string) (*prollyStats, error) { + fs, ok := sc.dbFs[strings.ToLower(storageTarget)] + if !ok { + return nil, fmt.Errorf("failed to remove stats db: %s filesys not found", storageTarget) + } + + params := make(map[string]interface{}) + params[dbfactory.GRPCDialProviderParam] = sc.dialPro + + var urlPath string + u, err := earl.Parse(sc.pro.DbFactoryUrl()) + if u.Scheme == dbfactory.MemScheme { + urlPath = path.Join(sc.pro.DbFactoryUrl(), dbfactory.DoltDataDir) + } else if u.Scheme == dbfactory.FileScheme { + urlPath = doltdb.LocalDirDoltDB + } + + statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) + if err != nil { + return nil, err + } + + var dEnv *env.DoltEnv + exists, isDir := statsFs.Exists("") + if !exists { + err := statsFs.MkDirs("") + if err != nil { + return nil, fmt.Errorf("unable to make directory '%s', cause: %s", dbfactory.DoltStatsDir, err.Error()) + } + + dEnv = env.Load(ctx, sc.hdp, statsFs, urlPath, "test") + sess := dsess.DSessFromSess(ctx.Session) + err = dEnv.InitRepo(ctx, types.Format_Default, sess.Username(), sess.Email(), storageTarget) + if err != nil { + return nil, err + } + } else if !isDir { + return nil, fmt.Errorf("file exists where the dolt stats directory should be") + } else { + dEnv = env.LoadWithoutDB(ctx, sc.hdp, statsFs, "", doltversion.Version) + } + + if err := dEnv.LoadDoltDBWithParams(ctx, types.Format_Default, urlPath, statsFs, params); err != nil { + return nil, err + } + + deaf := dEnv.DbEaFactory(ctx) + + tmpDir, err := dEnv.TempTableFilesDir() + if err != nil { + return nil, err + } + opts := editor.Options{ + Deaf: deaf, + Tempdir: tmpDir, + } + statsDb, err := sqle.NewDatabase(ctx, "stats", dEnv.DbData(ctx), opts) + if err != nil { + return nil, err + } + return NewProllyStats(ctx, statsDb) +} + +func (sc *StatsCoord) WaitForDbSync(ctx *sql.Context) error { + // wait for the current partial + one full cycle to complete + for _ = range 2 { + done := sc.getCycleWaiter() + select { + case <-done: + case <-ctx.Done(): + return context.Cause(ctx) + } + } + return nil +} + +func (sc *StatsCoord) Gc(ctx *sql.Context) error { + sc.sq.InterruptAsync(func() { + sc.doGc = true + }) + return sc.WaitForDbSync(ctx) +} diff --git a/go/libraries/doltcore/sqle/statspro/scheduler.go b/go/libraries/doltcore/sqle/statspro/scheduler.go new file mode 100644 index 00000000000..83a0677ebf2 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/scheduler.go @@ -0,0 +1,220 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/statspro/jobqueue" + "log" + "sync" + "time" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/sirupsen/logrus" + + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/utils/filesys" +) + +type ctxFactory func(ctx context.Context) (*sql.Context, error) + +func NewStatsCoord(ctx context.Context, pro *sqle.DoltDatabaseProvider, ctxGen ctxFactory, logger *logrus.Logger, threads *sql.BackgroundThreads, dEnv *env.DoltEnv) *StatsCoord { + done := make(chan struct{}) + close(done) + kv := NewMemStats() + sq := jobqueue.NewSerialQueue() + go func() { + sq.Run(ctx) + }() + return &StatsCoord{ + statsMu: &sync.Mutex{}, + logger: logger, + JobInterval: 500 * time.Millisecond, + gcInterval: 24 * time.Hour, + branchInterval: 24 * time.Hour, + sq: sq, + Stats: make(map[tableIndexesKey][]*stats.Statistic), + dbFs: make(map[string]filesys.Filesys), + threads: threads, + senderDone: done, + cycleMu: &sync.Mutex{}, + kv: kv, + pro: pro, + hdp: dEnv.GetUserHomeDir, + dialPro: env.NewGRPCDialProviderFromDoltEnv(dEnv), + ctxGen: ctxGen, + } +} + +func (sc *StatsCoord) SetMemOnly(v bool) { + sc.memOnly = v +} + +func (sc *StatsCoord) SetEnableGc(v bool) { + sc.enableGc = v +} + +func (sc *StatsCoord) SetTimers(job, gc, branch int64) { + sc.JobInterval = time.Duration(job) + sc.gcInterval = time.Duration(gc) + sc.branchInterval = time.Duration(branch) +} + +type tableIndexesKey struct { + db string + branch string + table string + schema string +} + +func (k tableIndexesKey) String() string { + return k.db + "/" + k.branch + "/" + k.table +} + +type StatsCoord struct { + logger *logrus.Logger + threads *sql.BackgroundThreads + pro *sqle.DoltDatabaseProvider + statsBackingDb string + dialPro dbfactory.GRPCDialProvider + hdp env.HomeDirProvider + dbFs map[string]filesys.Filesys + + // ctxGen lets us fetch the most recent working root + ctxGen ctxFactory + + cycleMu *sync.Mutex + cycleCtx context.Context + cycleCancel context.CancelFunc + sq *jobqueue.SerialQueue + + senderDone chan struct{} + + JobInterval time.Duration + gcInterval time.Duration + branchInterval time.Duration + memOnly bool + enableGc bool + doGc bool + Debug bool + + // kv is a content-addressed cache of histogram objects: + // buckets, first bounds, and schema-specific statistic + // templates. + kv StatsKv + + // Stats tracks table statistics accessible to sessions. + Stats map[tableIndexesKey][]*stats.Statistic + statsMu *sync.Mutex + + dbCnt int + gcCnt int +} + +// Stop stops the sender thread and then pauses the queue +func (sc *StatsCoord) Stop(ctx context.Context) error { + return sc.sq.InterruptSync(ctx, func() { + sc.cancelSender() + select { + case <-ctx.Done(): + return + case <-sc.senderDone: + return + } + }) + if err := sc.sq.Pause(); err != nil { + return err + } +} + +// Restart continues the queue and blocks until sender is running +func (sc *StatsCoord) Restart(ctx context.Context) error { + sc.sq.Start() + return sc.sq.InterruptSync(ctx, func() { + sc.cancelSender() + select { + case <-ctx.Done(): + return + case <-sc.senderDone: + } + go func() { + sc.runSender(ctx) + }() + }) +} + +func (sc *StatsCoord) Close() { + sc.sq.Stop() + sc.cancelSender() + return +} + +func (sc *StatsCoord) AddFs(db dsess.SqlDatabase, fs filesys.Filesys) { + sc.dbFs[db.AliasedName()] = fs + return +} + +func (sc *StatsCoord) Info(ctx context.Context) (dprocedures.StatsInfo, error) { + sc.statsMu.Lock() + defer sc.statsMu.Unlock() + + cachedBucketCnt := sc.kv.Len() + var cachedBoundCnt int + var cachedTemplateCnt int + switch kv := sc.kv.(type) { + case *memStats: + cachedBoundCnt = len(kv.bounds) + cachedTemplateCnt = len(kv.templates) + case *prollyStats: + cachedBoundCnt = len(kv.mem.bounds) + cachedTemplateCnt = len(kv.mem.templates) + } + + statCnt := len(sc.Stats) + + storageCnt, err := sc.kv.Flush(ctx) + if err != nil { + return dprocedures.StatsInfo{}, err + } + var active bool + select { + case <-sc.senderDone: + default: + active = true + } + + return dprocedures.StatsInfo{ + DbCnt: sc.dbCnt, + Active: active, + CachedBucketCnt: cachedBucketCnt, + StorageBucketCnt: storageCnt, + CachedBoundCnt: cachedBoundCnt, + CachedTemplateCnt: cachedTemplateCnt, + StatCnt: statCnt, + GcCounter: sc.gcCnt, + }, nil +} + +func (sc *StatsCoord) descError(d string, err error) { + if sc.Debug { + log.Println("stats error: ", err.Error()) + } + sc.logger.Errorf("stats error; job detail: %s; verbose: %s", d, err) +} diff --git a/go/libraries/doltcore/sqle/statspro/scheduler_test.go b/go/libraries/doltcore/sqle/statspro/scheduler_test.go new file mode 100644 index 00000000000..f9d0848202e --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/scheduler_test.go @@ -0,0 +1,1124 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "fmt" + "io" + "log" + "os" + "strconv" + "strings" + "sync" + "testing" + "time" + + gms "github.com/dolthub/go-mysql-server" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/analyzer" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/doltcore/branch_control" + "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils" + "github.com/dolthub/dolt/go/libraries/doltcore/env" + "github.com/dolthub/dolt/go/libraries/doltcore/ref" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" +) + +func TestScheduleLoop(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + + { + // add more data + b := strings.Repeat("b", 100) + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))")) + abIns := strings.Builder{} + abIns.WriteString("insert into ab values") + for i := range 200 { + if i > 0 { + abIns.WriteString(", ") + } + abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b)) + } + require.NoError(t, executeQuery(ctx, sqlEng, abIns.String())) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + // 4 old + 2*7 new ab + kv := sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 7, len(stat[0].Hist)) + require.Equal(t, 7, len(stat[1].Hist)) + } + + require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy")) + + //doGcCycle(t, ctx, sc) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 14, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 2, len(stat)) + require.Equal(t, 7, len(stat[0].Hist)) + require.Equal(t, 7, len(stat[1].Hist)) +} + +func TestAnalyze(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (-1,-1)")) + require.NoError(t, executeQuery(ctx, sqlEng, "analyze table xy")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, uint64(0), sc.gcCnt) + require.Equal(t, 6, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + for _, tableStats := range sc.Stats { + require.Equal(t, 2, len(tableStats)) + } +} + +func TestModifyColumn(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + { + require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy modify column y bigint")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 10, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 4, len(stat[0].Hist)) + require.Equal(t, 2, len(stat[1].Hist)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + require.Equal(t, 6, len(kv.buckets)) + } +} + +func TestAddColumn(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + { + require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy add column z int")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) // +2 for new schema + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 2, len(stat[0].Hist)) + require.Equal(t, 2, len(stat[1].Hist)) + } +} + +func TestDropIndex(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + { + require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 2, len(stat[0].Hist)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + kv = sc.kv.(*memStats) + require.Equal(t, 2, len(kv.buckets)) + require.Equal(t, 1, len(kv.bounds)) + require.Equal(t, 1, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat = sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 2, len(stat[0].Hist)) + } +} + +func TestDropTable(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + { + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into ab values (0,0)")) + require.NoError(t, executeQuery(ctx, sqlEng, "drop table xy")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) + require.Equal(t, 3, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 1, len(stat[0].Hist)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + kv = sc.kv.(*memStats) + require.Equal(t, 1, len(kv.buckets)) + require.Equal(t, 1, len(kv.bounds)) + require.Equal(t, 1, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat = sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 1, len(stat)) + require.Equal(t, 1, len(stat[0].Hist)) + } +} + +func TestDeleteAboveBoundary(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y")) + + { + require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 498")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) // 1 for new chunk + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) // +1 for schema change + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}] + require.Equal(t, 2, len(stat[0].Hist)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + require.Equal(t, 2, len(kv.buckets)) + } +} + +func TestDeleteBelowBoundary(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y")) + + { + require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 410")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + + require.Equal(t, 5, len(kv.buckets)) // +1 rewrite partial chunk + require.Equal(t, 3, len(kv.bounds)) // +1 rewrite first chunk + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}] + require.Equal(t, 1, len(stat[0].Hist)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + require.Equal(t, 1, len(kv.buckets)) + } +} + +func TestDeleteOnBoundary(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + require.NoError(t, executeQuery(ctx, sqlEng, "alter table xy drop index y")) + + { + // PRIMARY boundary chunk -> rewrite y_idx's second + require.NoError(t, executeQuery(ctx, sqlEng, "delete from xy where x > 414")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv := sc.kv.(*memStats) + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) // +1 schema change + require.Equal(t, 1, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{db: "mydb", branch: "main", table: "xy"}] + require.Equal(t, 1, len(stat[0].Hist)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + require.Equal(t, 1, len(kv.buckets)) + } +} + +func TestAddDropDatabases(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + { + require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + // xy and t + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) + require.Equal(t, 3, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}] + require.Equal(t, 1, len(stat)) + } + + dropHook := NewDropDatabaseHook(sc) + { + require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb")) + dropHook(ctx, "otherdb") + + _, ok := sc.Stats[tableIndexesKey{db: "otherdb", branch: "main", table: "t"}] + require.False(t, ok) + } +} + +func TestGC(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + + { + require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)")) + + require.NoError(t, executeQuery(ctx, sqlEng, "create database thirddb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table s (i int primary key, j int, key (j))")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into s values (0,0), (1,1), (2,2)")) + + dropHook := NewDropDatabaseHook(sc) + require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb")) + dropHook(ctx, "otherdb") + + require.NoError(t, executeQuery(ctx, sqlEng, "alter table s drop index j")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + // test for cleanup + kv := sc.kv.(*memStats) + require.Equal(t, 5, len(kv.buckets)) + require.Equal(t, 3, len(kv.bounds)) + require.Equal(t, 3, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats)) + } +} + +func TestBranches(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = true + + { + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add xy')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table t (i int primary key)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (0), (1)")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add t')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "create database thirddb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table s (i int primary key, j int, key (j))")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into s values (0,0), (1,1), (2,2)")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'add s')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_stop()")) + + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat1')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "use otherdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat2')")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into t values (2), (3)")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'insert into t')")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat3')")) + require.NoError(t, executeQuery(ctx, sqlEng, "drop table t")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'drop t')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "use thirddb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', 'feat1')")) + require.NoError(t, executeQuery(ctx, sqlEng, "alter table s drop index j")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_commit('-Am', 'drop index j')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + stat, ok := sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat3", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats[tableIndexesKey{"thirddb", "feat1", "s", ""}] + require.False(t, ok) + stat, ok = sc.Stats[tableIndexesKey{"otherdb", "main", "t", ""}] + require.Equal(t, 1, len(stat)) + stat = sc.Stats[tableIndexesKey{"thirddb", "main", "s", ""}] + require.Equal(t, 2, len(stat)) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_restart()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + stat, ok = sc.Stats[tableIndexesKey{"mydb", "feat1", "xy", ""}] + require.True(t, ok) + require.Equal(t, 2, len(stat)) + stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}] + require.True(t, ok) + require.Equal(t, 1, len(stat)) + stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat3", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats[tableIndexesKey{"thirddb", "feat1", "s", ""}] + require.True(t, ok) + require.Equal(t, 1, len(stat)) + + // mydb: 4 shared + // otherdb: 1 + 1 + // thirddb: 2 + shared + kv := sc.kv.(*memStats) + require.Equal(t, 4+2+2, len(kv.buckets)) + require.Equal(t, 2+(1+1)+2, len(kv.bounds)) + require.Equal(t, 2+1+(2+1), len(kv.templates)) + require.Equal(t, 7-1, len(sc.Stats)) + + require.NoError(t, executeQuery(ctx, sqlEng, "drop database otherdb")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + stat, ok = sc.Stats[tableIndexesKey{"otherdb", "feat2", "t", ""}] + require.False(t, ok) + stat, ok = sc.Stats[tableIndexesKey{"otherdb", "main", "t", ""}] + require.False(t, ok) + + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_branch('-D', 'feat1')")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + stat, ok = sc.Stats[tableIndexesKey{"mydb", "feat1", "xy", ""}] + require.False(t, ok) + stat, ok = sc.Stats[tableIndexesKey{"mydb", "main", "xy", ""}] + require.True(t, ok) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + // 3 dbs remaining, mydb/main, thirddb/feat1, thirddb/main + kv = sc.kv.(*memStats) + require.Equal(t, 4+2, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 5, len(kv.templates)) + require.Equal(t, 3, len(sc.Stats)) + } +} + +func TestBucketDoubling(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + + cur := sc.kv.(*memStats).buckets + newB := make(map[bucketKey]*stats.Bucket) + for k, v := range cur { + newB[k] = v + } + sc.kv.(*memStats).buckets = newB + + // add more data + b := strings.Repeat("b", 100) + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))")) + abIns := strings.Builder{} + abIns.WriteString("insert into ab values") + for i := range 200 { + if i > 0 { + abIns.WriteString(", ") + } + abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b)) + } + require.NoError(t, executeQuery(ctx, sqlEng, abIns.String())) + + sc.enableGc = true + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + // 4 old + 2*7 new ab + kv := sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 4, len(kv.bounds)) + require.Equal(t, 4, len(kv.templates)) + require.Equal(t, 2, len(sc.Stats)) + stat := sc.Stats[tableIndexesKey{"mydb", "main", "ab", ""}] + require.Equal(t, 7, len(stat[0].Hist)) + require.Equal(t, 7, len(stat[1].Hist)) +} + +func TestBucketCounting(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + sc.enableGc = false + + // add more data + b := strings.Repeat("b", 100) + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(100), key (b,a))")) + abIns := strings.Builder{} + abIns.WriteString("insert into ab values") + for i := range 200 { + if i > 0 { + abIns.WriteString(", ") + } + abIns.WriteString(fmt.Sprintf("(%d, '%s')", i, b)) + } + require.NoError(t, executeQuery(ctx, sqlEng, abIns.String())) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + // 4 old + 2*7 new ab + kv := sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 2, len(sc.Stats)) + + require.NoError(t, executeQuery(ctx, sqlEng, "create table cd (c int primary key, d varchar(200), key (d,c))")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into cd select a,b from ab")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + // no new buckets + kv = sc.kv.(*memStats) + require.Equal(t, 18, len(kv.buckets)) + require.Equal(t, 3, len(sc.Stats)) +} + +func TestDropOnlyDb(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, false) + + require.NoError(t, sc.Restart(ctx)) + + _, ok := sc.kv.(*prollyStats) + require.True(t, ok) + require.Equal(t, "mydb", sc.statsBackingDb) + + // what happens when we drop the only database? swap to memory? + // add first database, switch to prolly? + require.NoError(t, executeQuery(ctx, sqlEng, "drop database mydb")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + require.NoError(t, sc.Stop(context.Background())) + + // empty memory KV + _, ok = sc.kv.(*memStats) + require.True(t, ok) + require.Equal(t, "", sc.statsBackingDb) + + require.NoError(t, executeQuery(ctx, sqlEng, "create database otherdb")) + + // empty prollyKv + _, ok = sc.kv.(*prollyStats) + require.True(t, ok) + require.Equal(t, "otherdb", sc.statsBackingDb) +} + +func TestRotateBackingDb(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, false) + + require.NoError(t, executeQuery(ctx, sqlEng, "create database backupdb")) + + require.NoError(t, executeQuery(ctx, sqlEng, "use backupdb")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0), (1,1), (2,2)")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + require.Equal(t, 5, sc.kv.Len()) + require.Equal(t, 2, len(sc.Stats)) + + require.NoError(t, executeQuery(ctx, sqlEng, "drop database mydb")) + + _, ok := sc.kv.(*prollyStats) + require.True(t, ok) + require.Equal(t, "backupdb", sc.statsBackingDb) + + // lost the backing storage, previous in-memory moves into new kv + require.Equal(t, 5, sc.kv.Len()) + require.Equal(t, 1, len(sc.Stats)) + +} + +func TestReadCounter(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := defaultSetup(t, threads, true) + + { + si, err := sc.Info(ctx) + require.NoError(t, err) + require.Equal(t, 0, si.ReadCnt) + + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (501, 0)")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + si, err = sc.Info(ctx) + require.NoError(t, err) + require.Equal(t, 2, si.ReadCnt) + } +} + +func TestPanic(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false) + sc.SetEnableGc(true) + + require.NoError(t, sc.Restart(ctx)) + + sc.sq.DoSync(ctx, func() { + panic("test panic") + }) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) +} + +func TestPurge(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false) + sc.SetEnableGc(true) + + require.NoError(t, sc.Restart(ctx)) + + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y varchar(10), key (y,x))")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0), (1,1), (2,2)")) + require.NoError(t, executeQuery(ctx, sqlEng, "create database other")) + require.NoError(t, executeQuery(ctx, sqlEng, "use other")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table ab (a int primary key, b varchar(10), key (b,a))")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into ab values (0,0), (1,1), (2,2)")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + require.NoError(t, sc.Stop(context.Background())) + + kv := sc.kv.(*prollyStats) + require.Equal(t, 2, kv.Len()) + require.Equal(t, 4, len(kv.mem.templates)) + require.Equal(t, 2, len(kv.mem.bounds)) + m, err := kv.m.Map(ctx) + require.NoError(t, err) + cmpCnt, err := m.Count() + require.NoError(t, err) + require.Equal(t, 2, cmpCnt) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + + kv = sc.kv.(*prollyStats) + require.Equal(t, 0, kv.Len()) + require.Equal(t, 0, len(kv.mem.templates)) + require.Equal(t, 0, len(kv.mem.bounds)) + m, err = kv.m.Map(ctx) + require.NoError(t, err) + cmpCnt, err = m.Count() + require.NoError(t, err) + require.Equal(t, 0, cmpCnt) +} + +func emptySetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool) (*sql.Context, *gms.Engine, *StatsCoord) { + dEnv := dtestutils.CreateTestEnv() + sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + + sql.SystemVariables.AssignValues(map[string]interface{}{ + dsess.DoltStatsGCInterval: 100, + dsess.DoltStatsBranchInterval: 100, + dsess.DoltStatsJobInterval: 1, + }) + + sc := sqlEng.Analyzer.Catalog.StatsProvider.(*StatsCoord) + sc.SetEnableGc(false) + sc.JobInterval = time.Nanosecond + + require.NoError(t, sc.Restart(ctx)) + + ctx, _ = sc.ctxGen(ctx) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, sc.Stop(context.Background())) + + var sqlDbs []sqle.Database + for _, db := range sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) { + if sqlDb, ok := db.(sqle.Database); ok { + branch := ref.NewBranchRef("main") + db, err := sqle.RevisionDbForBranch(ctx, sqlDb, branch.GetPath(), branch.GetPath()+"/"+sqlDb.AliasedName()) + require.NoError(t, err) + sqlDbs = append(sqlDbs, db.(sqle.Database)) + } + } + + if memOnly { + statsKv := NewMemStats() + sc.kv = statsKv + } + + return ctx, sqlEng, sc +} + +func defaultSetup(t *testing.T, threads *sql.BackgroundThreads, memOnly bool) (*sql.Context, *gms.Engine, *StatsCoord) { + ctx, sqlEng, sc := emptySetup(t, threads, memOnly) + //sc.Debug = true + + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int, key (y,x))")) + + xyIns := strings.Builder{} + xyIns.WriteString("insert into xy values") + for i := range 500 { + if i > 0 { + xyIns.WriteString(", ") + } + xyIns.WriteString(fmt.Sprintf("(%d, %d)", i, i%25)) + } + require.NoError(t, executeQuery(ctx, sqlEng, xyIns.String())) + + var kv *memStats + switch s := sc.kv.(type) { + case *memStats: + kv = s + case *prollyStats: + kv = s.mem + } + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + for _, tableStats := range sc.Stats { + require.Equal(t, 2, len(tableStats)) + } + + switch s := sc.kv.(type) { + case *memStats: + kv = s + case *prollyStats: + kv = s.mem + } + require.Equal(t, 4, len(kv.buckets)) + require.Equal(t, 2, len(kv.bounds)) + require.Equal(t, 2, len(kv.templates)) + require.Equal(t, 1, len(sc.Stats)) + for _, tableStats := range sc.Stats { + require.Equal(t, 2, len(tableStats)) + } + + return ctx, sqlEng, sc +} + +func executeQuery(ctx *sql.Context, eng *gms.Engine, query string) error { + _, iter, _, err := eng.Query(ctx, query) + if err != nil { + return err + } + for { + _, err = iter.Next(ctx) + if err == io.EOF { + break + } + if err != nil { + return err + } + } + return iter.Close(ctx) // tx commit +} + +func executeQueryResults(ctx *sql.Context, eng *gms.Engine, query string) ([]sql.Row, error) { + _, iter, _, err := eng.Query(ctx, query) + if err != nil { + return nil, err + } + var ret []sql.Row + for { + r, err := iter.Next(ctx) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + ret = append(ret, r) + } + return ret, iter.Close(ctx) // tx commit +} + +func newTestEngine(ctx context.Context, dEnv *env.DoltEnv, threads *sql.BackgroundThreads) (*gms.Engine, *sql.Context) { + pro, err := sqle.NewDoltDatabaseProviderWithDatabases("main", dEnv.FS, nil, nil, threads) + if err != nil { + panic(err) + } + + mrEnv, err := env.MultiEnvForDirectory(ctx, dEnv.Config.WriteableConfig(), dEnv.FS, dEnv.Version, dEnv) + if err != nil { + panic(err) + } + + sc := NewStatsCoord(ctx, pro, nil, logrus.StandardLogger(), threads, dEnv) + + gcSafepointController := dsess.NewGCSafepointController() + + doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController) + if err != nil { + panic(err) + } + + sqlCtx := sql.NewContext(ctx, sql.WithSession(doltSession)) + sqlCtx.SetCurrentDatabase(mrEnv.GetFirstDatabase()) + + sc.ctxGen = func(ctx context.Context) (*sql.Context, error) { + doltSession, err := dsess.NewDoltSession(sql.NewBaseSession(), pro, dEnv.Config.WriteableConfig(), branch_control.CreateDefaultController(ctx), sc, writer.NewWriteSession, gcSafepointController) + if err != nil { + return nil, err + } + return sql.NewContext(ctx, sql.WithSession(doltSession)), nil + } + + pro.InitDatabaseHooks = append(pro.InitDatabaseHooks, NewInitDatabaseHook(sc)) + pro.DropDatabaseHooks = append(pro.DropDatabaseHooks, NewDropDatabaseHook(sc)) + + sqlEng := gms.New(analyzer.NewBuilder(pro).Build(), &gms.Config{ + IsReadOnly: false, + IsServerLocked: false, + }) + sqlEng.Analyzer.Catalog.StatsProvider = sc + return sqlEng, sqlCtx +} + +func TestStatsGcConcurrency(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false) + sc.SetEnableGc(true) + sc.JobInterval = 1 * time.Nanosecond + sc.gcInterval = 100 * time.Nanosecond + sc.branchInterval = 50 * time.Nanosecond + require.NoError(t, sc.Restart(ctx)) + + addDb := func(ctx *sql.Context, dbName string) { + require.NoError(t, executeQuery(ctx, sqlEng, "create database "+dbName)) + } + + addData := func(ctx *sql.Context, dbName string, i int) { + //log.Println("add ", dbName) + require.NoError(t, executeQuery(ctx, sqlEng, "use "+dbName)) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")")) + } + + dropDb := func(dropCtx *sql.Context, dbName string) { + //log.Println("drop ", dbName) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "drop database "+dbName)) + } + + // it is important to use new sessions for this test, to avoid working root conflicts + addCtx, _ := sc.ctxGen(context.Background()) + writeCtx, _ := sc.ctxGen(context.Background()) + dropCtx, _ := sc.ctxGen(context.Background()) + + iters := 200 + dbs := make(chan string, iters) + + { + wg := sync.WaitGroup{} + wg.Add(2) + + addCnt := 0 + go func() { + for i := range iters { + addCnt++ + dbName := "db" + strconv.Itoa(i) + addDb(addCtx, dbName) + addData(writeCtx, dbName, i) + dbs <- dbName + } + close(dbs) + wg.Done() + }() + + dropCnt := 0 + go func() { + i := 0 + for db := range dbs { + if i%2 == 0 { + time.Sleep(50 * time.Millisecond) + dropCnt++ + dropDb(dropCtx, db) + } + i++ + } + wg.Done() + }() + + wg.Wait() + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + require.NoError(t, sc.Stop(context.Background())) + + // 101 dbs, 100 with stats (not main) + require.Equal(t, iters/2, len(sc.Stats)) + //require.NoError(t, sc.ValidateState(ctx)) + require.Equal(t, iters/2, sc.kv.Len()) + } +} + +func TestStatsBranchConcurrency(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false) + sc.SetEnableGc(true) + + sc.JobInterval = 10 + sc.gcInterval = time.Hour + sc.branchInterval = time.Hour + require.NoError(t, sc.Restart(ctx)) + + addBranch := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')")) + } + + addData := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")")) + //require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + err := executeQuery(ctx, sqlEng, "call dolt_stats_sync()") + for err != nil { + log.Println("add waiting on: ", err.Error()) + err = executeQuery(ctx, sqlEng, "call dolt_stats_sync()") + } + } + + dropBranch := func(dropCtx *sql.Context, branchName string) { + //log.Println("delete branch: ", branchName) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + del := "call dolt_branch('-d', '" + branchName + "')" + require.NoError(t, executeQuery(ctx, sqlEng, del)) + } + + // it is important to use new sessions for this test, to avoid working root conflicts + addCtx, _ := sc.ctxGen(context.Background()) + dropCtx, _ := sc.ctxGen(context.Background()) + + iters := 100 + { + branches := make(chan string, iters) + + wg := sync.WaitGroup{} + wg.Add(2) + + go func() { + for i := range iters { + addBranch(addCtx, i) + addData(addCtx, i) + branches <- "branch" + strconv.Itoa(i) + } + close(branches) + wg.Done() + }() + + go func() { + i := 0 + for br := range branches { + if i%2 == 0 { + dropBranch(dropCtx, br) + time.Sleep(50 * time.Millisecond) + } + i++ + } + wg.Done() + }() + + wg.Wait() + + err := executeQuery(ctx, sqlEng, "call dolt_stats_sync()") + for err != nil { + log.Println("waiting on final branch sync", err) + err = executeQuery(ctx, sqlEng, "call dolt_stats_sync()") + } + err = executeQuery(ctx, sqlEng, "call dolt_stats_gc()") + for err != nil { + log.Println("waiting on final Gc", err) + err = executeQuery(ctx, sqlEng, "call dolt_stats_gc()") + } + require.NoError(t, sc.Stop(context.Background())) + + // at the end we should still have |iters/2| databases + require.Equal(t, iters/2, len(sc.Stats)) + //require.NoError(t, sc.ValidateState(ctx)) + require.Equal(t, iters/2, sc.kv.Len()) + } +} + +func TestStatsCacheGrowth(t *testing.T) { + //t.Skip("expensive test") + + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + ctx, sqlEng, sc := emptySetup(t, threads, false) + sc.SetEnableGc(true) + + sc.JobInterval = 10 + sc.gcInterval = time.Hour + sc.branchInterval = time.Hour + require.NoError(t, sc.Restart(ctx)) + + addBranch := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('main')")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('-b', '"+branchName+"')")) + } + + addData := func(ctx *sql.Context, i int) { + branchName := "branch" + strconv.Itoa(i) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_checkout('"+branchName+"')")) + require.NoError(t, executeQuery(ctx, sqlEng, "create table xy (x int primary key, y int)")) + require.NoError(t, executeQuery(ctx, sqlEng, "insert into xy values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5), (6,"+strconv.Itoa(i)+")")) + + } + + iters := 2000 + if os.Getenv("CI") != "" { + iters = 1025 + } + { + branches := make(chan string, iters) + + go func() { + addCtx, _ := sc.ctxGen(context.Background()) + for i := range iters { + addBranch(addCtx, i) + addData(addCtx, i) + branches <- "branch" + strconv.Itoa(i) + if i%500 == 0 { + log.Println("branches: ", strconv.Itoa(i)) + for { + syncErr := executeQuery(addCtx, sqlEng, "call dolt_stats_sync()") + waitErr := executeQuery(addCtx, sqlEng, "call dolt_stats_wait()") + if waitErr == nil && syncErr == nil { + break + } else if syncErr != nil { + log.Println("waiting on: ", strconv.Itoa(i), syncErr.Error()) + } else if syncErr != nil { + log.Println("waiting on: ", strconv.Itoa(i), waitErr.Error()) + } + } + } + } + close(branches) + }() + + //waitCtx, _ := sc.ctxGen(context.Background()) + i := 0 + for _ = range branches { + //if i%50 == 0 { + // log.Println("branches: ", strconv.Itoa(i)) + // require.NoError(t, executeQuery(waitCtx, sqlEng, "call dolt_stats_wait()")) + //} + i++ + } + + executeQuery(ctx, sqlEng, "call dolt_stats_wait()") + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + require.NoError(t, sc.Stop(context.Background())) + + // at the end we should still have |iters/2| databases + require.Equal(t, iters, len(sc.Stats)) + //require.NoError(t, sc.ValidateState(ctx)) + require.Equal(t, iters, sc.kv.Len()) + } +} diff --git a/go/libraries/doltcore/sqle/statspro/script_test.go b/go/libraries/doltcore/sqle/statspro/script_test.go new file mode 100644 index 00000000000..fc0f9529cd5 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/script_test.go @@ -0,0 +1,738 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "log" + "strconv" + "testing" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dprocedures" +) + +type scriptTest struct { + name string + setup []string + assertions []assertion +} + +type assertion struct { + query string + res []sql.Row + err string +} + +func TestStatScripts(t *testing.T) { + threads := sql.NewBackgroundThreads() + defer threads.Shutdown() + + scripts := []scriptTest{ + { + name: "track updates", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'zero'), (1, 'one')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(9)}}, + }, + { + query: "update xy set y = 2 where x between 100 and 800", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(9)}}, + }, + }, + }, + { + name: "track deletes", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'zero'), (1, 'one')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(9)}}, + }, + { + query: "delete from xy where x > 600", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(5)}}, + }, + }, + }, + { + name: "ddl table", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'0'), (1,'0'), (2,'0')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "truncate table xy", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(0)}}, + }, + { + query: "insert into xy values (0,'0'), (1,'0'), (2,'0')", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "drop table xy", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(0)}}, + }, + }, + }, + { + name: "ddl index", + setup: []string{ + "create table xy (x int primary key, y varchar(16), key (y,x))", + "insert into xy values (0,'0'), (1,'0'), (2,'0')", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "alter table xy drop index y", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(1)}}, + }, + { + query: "alter table xy add index yx (y,x)", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "select types, upper_bound from dolt_statistics where index_name = 'yx'", + res: []sql.Row{{"varchar(16),int", "0,2"}}, + }, + { + query: "alter table xy modify column y int", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select types, upper_bound from dolt_statistics where index_name = 'yx'", + res: []sql.Row{{"int,int", "0,2"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + }, + }, + { + name: "mcv counts", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "alter table xy add index y2 (y)", + "alter table xy add index x2 (x,y)", + "insert into xy values (0,0), (1,0), (2,0), (3,0), (4,0), (5,0), (6,1), (7,1), (8,1), (9,1),(10,3),(11,4),(12,5),(13,6),(14,7),(15,8),(16,9),(17,10),(18,11)", + }, + assertions: []assertion{ + { + query: "select mcv1, mcv2, mcv_counts from dolt_statistics where index_name = 'y2'", + res: []sql.Row{{"1", "0", "4,6"}}, + }, + { + query: "select mcv_counts from dolt_statistics where index_name = 'y'", + res: []sql.Row{{""}}, + }, + { + query: "select mcv_counts from dolt_statistics where index_name = 'x2'", + res: []sql.Row{{""}}, + }, + }, + }, + { + name: "caps testing", + setup: []string{ + "create table XY (x int primary key, Y int, key Yx (Y,x))", + "alter table xy add index y2 (y)", + "insert into xy values (0,0), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{{"mydb", "xy", "primary"}, {"mydb", "xy", "y2"}, {"mydb", "xy", "yx"}}, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(3)}}, + }, + { + query: "insert into xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(12)}}, + }, + { + query: "delete from xy where x > 500", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(6)}}, + }, + }, + }, + { + name: "database ddl", + setup: []string{ + "create table mydb.xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "create database repo2", + "create table repo2.xy (x int primary key, y int, key (y,x))", + "insert into repo2.xy values (0,0), (1,0), (2,0)", + "create table repo2.ab (a int primary key, b int, key (b,a))", + "insert into repo2.ab values (0,0), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{ + {"mydb", "xy", "primary"}, {"mydb", "xy", "y"}, + }, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "select database_name, table_name, index_name from repo2.dolt_statistics order by index_name", + res: []sql.Row{ + {"repo2", "ab", "b"}, {"repo2", "ab", "primary"}, + {"repo2", "xy", "primary"}, {"repo2", "xy", "y"}, + }, + }, + { + query: "use repo2", + }, + { + query: "select database_name, table_name, index_name from dolt_statistics order by index_name", + res: []sql.Row{ + {"repo2", "ab", "b"}, {"repo2", "ab", "primary"}, + {"repo2", "xy", "primary"}, {"repo2", "xy", "y"}, + }, + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(4)}}, + }, + { + query: "insert into repo2.xy select x, 1 from (with recursive inputs(x) as (select 4 union select x+1 from inputs where x < 1000) select * from inputs) dt;", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(10)}}, + }, + { + query: "drop database repo2", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "use mydb", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + }, + }, + { + name: "recreate table without index", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(2)}}, + }, + { + query: "drop table xy", + }, + { + query: "create table xy (x int primary key, y int)", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "select count(*) from dolt_statistics", + res: []sql.Row{{int64(0)}}, + }, + }, + }, + { + name: "stats info", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_checkout('feat')", + }, + { + query: "drop table xy", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_gc()", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_gc()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 1, + GcCounter: 3, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_checkout('main')", + }, + { + query: "call dolt_branch('-D', 'feat')", + }, + { + query: "call dolt_stats_sync()", + }, + { + query: "call dolt_stats_gc()", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 1, + ReadCnt: 0, + Active: true, + DbSeedCnt: 1, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 1, + GcCounter: 4, + SyncCounter: 2, + }.ToJson(), + }}, + }, + }, + }, + { + name: "stats stop/start", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_stats_stop()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: false, + DbSeedCnt: 0, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_stats_restart()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + }, + }, + { + name: "stats purge", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + }, + assertions: []assertion{ + { + query: "insert into xy values (3,0)", + }, + { + query: "call dolt_checkout('feat')", + }, + { + query: "insert into xy values (3,0)", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 4, + CachedBucketCnt: 4, + CachedBoundCnt: 4, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_stats_purge()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: false, + DbSeedCnt: 2, + StorageBucketCnt: 0, + CachedBucketCnt: 0, + CachedBoundCnt: 0, + CachedTemplateCnt: 0, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_stats_restart()", + }, + { + query: "call dolt_stats_wait()", + }, + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + }, + }, + { + name: "stats validate", + setup: []string{ + "create table xy (x int primary key, y int, key (y,x))", + "insert into xy values (0,0), (1,0), (2,0)", + "call dolt_add('-A')", + "call dolt_commit('-m', 'create xy')", + "call dolt_checkout('-b', 'feat')", + "call dolt_checkout('main')", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info()", + res: []sql.Row{ + {dprocedures.StatsInfo{ + DbCnt: 2, + ReadCnt: 0, + Active: true, + DbSeedCnt: 2, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 2, + GcCounter: 1, + SyncCounter: 1, + }.ToJson(), + }}, + }, + { + query: "call dolt_stats_stop()", + }, + { + query: "create table ab (a int primary key, b int)", + }, + { + query: "insert into ab values (0,0), (1,1), (2,2)", + }, + { + query: "call dolt_stats_validate()", + err: "(mydb/main) missing template (PRIMARY/e29in)\n(mydb/main) missing bound (d9aov)\n(mydb/main) missing chunk (d9aov)\n", + }, + { + query: "call dolt_stats_restart()", + }, + { + query: "call dolt_stats_validate()", + res: []sql.Row{{"Ok"}}, + }, + }, + }, + { + name: "null bounds", + setup: []string{ + "create table xy (x int primary key, y int, key (y))", + "insert into xy values (0,NULL), (1,0), (2,0)", + }, + assertions: []assertion{ + { + query: "call dolt_stats_info()", + res: []sql.Row{{dprocedures.StatsInfo{ + DbCnt: 1, + ReadCnt: 0, + Active: true, + DbSeedCnt: 1, + StorageBucketCnt: 2, + CachedBucketCnt: 2, + CachedBoundCnt: 2, + CachedTemplateCnt: 2, + StatCnt: 1, + GcCounter: 1, + SyncCounter: 1, + }.ToJson()}}, + }, + }, + }, + } + + for _, tt := range scripts { + t.Run(tt.name, func(t *testing.T) { + ctx, sqlEng, sc := emptySetup(t, threads, false) + sc.SetEnableGc(true) + + require.NoError(t, sc.Restart(ctx)) + + //sc.Debug = true + + for _, s := range tt.setup { + require.NoError(t, executeQuery(ctx, sqlEng, s)) + } + + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_sync()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_wait()")) + require.NoError(t, executeQuery(ctx, sqlEng, "call dolt_stats_gc()")) + + for i, a := range tt.assertions { + log.Println(a.query) + rows, err := executeQueryResults(ctx, sqlEng, a.query) + if a.err != "" { + require.Equal(t, a.err, err.Error()) + } else { + require.NoError(t, err) + } + if a.res != nil { + require.Equal(t, a.res, rows, strconv.Itoa(i)+": "+a.query) + } + } + }) + } +} diff --git a/go/libraries/doltcore/sqle/statspro/seed_job.go b/go/libraries/doltcore/sqle/statspro/seed_job.go new file mode 100644 index 00000000000..19ba2d9470d --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/seed_job.go @@ -0,0 +1,124 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "fmt" + "strings" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/store/hash" +) + +// GetLatestTable will get the WORKING root table for the current database/branch +func GetLatestTable(ctx *sql.Context, tableName string, sqlDb sql.Database) (*sqle.DoltTable, *doltdb.Table, error) { + var db sqle.Database + switch d := sqlDb.(type) { + case sqle.Database: + db = d + case sqle.ReadReplicaDatabase: + db = d.Database + default: + return nil, nil, fmt.Errorf("expected sqle.Database, found %T", sqlDb) + } + sqlTable, ok, err := db.GetTableInsensitive(ctx, tableName) + if err != nil { + return nil, nil, err + } + if !ok { + return nil, nil, fmt.Errorf("statistics refresh error: table not found %s", tableName) + } + + var dTab *doltdb.Table + var sqleTable *sqle.DoltTable + switch t := sqlTable.(type) { + case *sqle.AlterableDoltTable: + sqleTable = t.DoltTable + dTab, err = t.DoltTable.DoltTable(ctx) + case *sqle.WritableDoltTable: + sqleTable = t.DoltTable + dTab, err = t.DoltTable.DoltTable(ctx) + case *sqle.DoltTable: + sqleTable = t + dTab, err = t.DoltTable(ctx) + default: + err = fmt.Errorf("failed to unwrap dolt table from type: %T", sqlTable) + } + if err != nil { + return nil, nil, err + } + return sqleTable, dTab, nil +} + +type templateCacheKey struct { + h hash.Hash + idxName string +} + +func (k templateCacheKey) String() string { + return k.idxName + "/" + k.h.String()[:5] +} + +func (sc *StatsCoord) getTemplate(ctx *sql.Context, sqlTable *sqle.DoltTable, sqlIdx sql.Index) (templateCacheKey, stats.Statistic, error) { + schHash, _, err := sqlTable.IndexCacheKey(ctx) + key := templateCacheKey{h: schHash.Hash, idxName: sqlIdx.ID()} + if template, ok := sc.kv.GetTemplate(key); ok { + return key, template, nil + } + fds, colset, err := stats.IndexFds(strings.ToLower(sqlTable.Name()), sqlTable.Schema(), sqlIdx) + if err != nil { + return templateCacheKey{}, stats.Statistic{}, err + } + + var class sql.IndexClass + switch { + case sqlIdx.IsSpatial(): + class = sql.IndexClassSpatial + case sqlIdx.IsFullText(): + class = sql.IndexClassFulltext + default: + class = sql.IndexClassDefault + } + + var types []sql.Type + for _, cet := range sqlIdx.ColumnExpressionTypes() { + types = append(types, cet.Type) + } + + tablePrefix := sqlTable.Name() + "." + cols := make([]string, len(sqlIdx.Expressions())) + for i, c := range sqlIdx.Expressions() { + cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) + } + + template := stats.Statistic{ + Cols: cols, + Typs: types, + IdxClass: uint8(class), + Fds: fds, + Colset: colset, + } + + // We put template twice, once for schema changes with no data + // changes (here), and once when we put chunks to avoid GC dropping + // templates before the finalize job. + sc.kv.PutTemplate(key, template) + + return key, template, nil +} diff --git a/go/libraries/doltcore/sqle/statspro/sender.go b/go/libraries/doltcore/sqle/statspro/sender.go new file mode 100644 index 00000000000..37fbf3f59a0 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/sender.go @@ -0,0 +1,315 @@ +package statspro + +import ( + "context" + "errors" + "fmt" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" + "github.com/dolthub/dolt/go/libraries/doltcore/ref" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/prolly" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "io" + "log" + "strings" +) + +// thread that does a full root walk, gets databases/branches/tables + +// control work throughput on sender or receiver side? + +// + +func (sc *StatsCoord) newCycle(ctx context.Context) context.Context { + sc.cycleMu.Lock() + defer sc.cycleMu.Unlock() + if sc.cycleCancel != nil { + sc.cycleCancel() + } + sc.cycleCtx, sc.cycleCancel = context.WithCancel(ctx) + return sc.cycleCtx +} + +func (sc *StatsCoord) cancelSender() { + sc.cycleMu.Lock() + defer sc.cycleMu.Unlock() + if sc.cycleCancel != nil { + sc.cycleCancel() + sc.cycleCancel = nil + } +} + +func (sc *StatsCoord) getCycleWaiter() <-chan struct{} { + sc.cycleMu.Lock() + defer sc.cycleMu.Unlock() + return sc.cycleCtx.Done() +} + +func (sc *StatsCoord) runSender(ctx context.Context) (err error) { + sc.senderDone = make(chan struct{}) + defer func() { + close(sc.senderDone) + }() + for { + cycleCtx := sc.newCycle(ctx) + + sqlCtx, err := sc.ctxGen(cycleCtx) + if err != nil { + return err + } + + newStats, err := sc.newStatsForRoot(sqlCtx) + if err != nil { + sc.descError("", err) + } + + sc.statsMu.Lock() + sc.Stats = newStats + sc.statsMu.Unlock() + + select { + case <-cycleCtx.Done(): + return context.Cause(cycleCtx) + } + } +} + +func (sc *StatsCoord) newStatsForRoot(ctx *sql.Context) (map[tableIndexesKey][]*stats.Statistic, error) { + var err error + dSess := dsess.DSessFromSess(ctx.Session) + dbs := dSess.Provider().AllDatabases(ctx) + newStats := make(map[tableIndexesKey][]*stats.Statistic) + for _, db := range dbs { + sqlDb, ok := db.(sqle.Database) + if !ok { + continue + } + + var branches []ref.DoltRef + if err := sc.sq.DoSync(ctx, func() { + ddb, ok := dSess.GetDoltDB(ctx, db.Name()) + if !ok { + sc.descError("dolt database not found "+db.Name(), nil) + } + branches, err = ddb.GetBranches(ctx) + if err != nil { + sc.descError("getBranches", err) + } + }); err != nil { + return nil, err + } + + for _, br := range branches { + sqlDb, err := sqle.RevisionDbForBranch(ctx, db.(dsess.SqlDatabase), br.GetPath(), br.GetPath()+"/"+sqlDb.AliasedName()) + if err != nil { + sc.descError("revisionForBranch", err) + continue + } + + var tableNames []string + if err := sc.sq.DoSync(ctx, func() { + tableNames, err = sqlDb.GetTableNames(ctx) + if err != nil { + sc.descError("getTableNames", err) + } + }); err != nil { + return nil, err + } + + for _, tableName := range tableNames { + tableKey, newTableStats, err := sc.updateTable(ctx, tableName, sqlDb) + if err != nil { + return nil, err + } + newStats[tableKey] = newTableStats + } + } + } + + return newStats, nil +} + +func (sc *StatsCoord) finalizeHistogram(template stats.Statistic, buckets []*stats.Bucket, firstBound sql.Row) *stats.Statistic { + template.LowerBnd = firstBound + for _, b := range buckets { + // accumulate counts + template.RowCnt += b.RowCnt + template.DistinctCnt += b.DistinctCnt + template.NullCnt += b.NullCnt + template.Hist = append(template.Hist, b) + } + return &template +} + +func (sc *StatsCoord) collectIndexNodes(ctx *sql.Context, prollyMap prolly.Map, idxLen int, nodes []tree.Node) ([]*stats.Bucket, sql.Row, error) { + updater := newBucketBuilder(sql.StatQualifier{}, idxLen, prollyMap.KeyDesc()) + keyBuilder := val.NewTupleBuilder(prollyMap.KeyDesc().PrefixDesc(idxLen)) + + firstNodeHash := nodes[0].HashOf() + lowerBound, ok := sc.kv.GetBound(firstNodeHash, idxLen) + if !ok { + sc.sq.DoSync(ctx, func() { + var err error + lowerBound, err = firstRowForIndex(ctx, prollyMap, keyBuilder) + if err != nil { + sc.descError("get histogram bucket for node", err) + } + if sc.Debug { + log.Printf("put bound: %s: %v\n", firstNodeHash.String()[:5], lowerBound) + } + + sc.kv.PutBound(firstNodeHash, lowerBound, idxLen) + }) + } + + var offset uint64 + var buckets []*stats.Bucket + for _, n := range nodes { + if _, ok, err := sc.kv.GetBucket(ctx, n.HashOf(), keyBuilder); err != nil { + return nil, nil, err + } else if ok { + continue + } + + treeCnt, err := n.TreeCount() + if err != nil { + return nil, nil, err + } + + err = sc.sq.DoSync(ctx, func() { + updater.newBucket() + + // we read exclusive range [node first key, next node first key) + start, stop := offset, offset+uint64(treeCnt) + iter, err := prollyMap.IterOrdinalRange(ctx, start, stop) + if err != nil { + sc.descError("get histogram bucket for node", err) + return + } + for { + // stats key will be a prefix of the index key + keyBytes, _, err := iter.Next(ctx) + if errors.Is(err, io.EOF) { + break + } else if err != nil { + sc.descError("get histogram bucket for node", err) + return + } + // build full key + for i := range keyBuilder.Desc.Types { + keyBuilder.PutRaw(i, keyBytes.GetField(i)) + } + + updater.add(keyBuilder.BuildPrefixNoRecycle(prollyMap.Pool(), updater.prefixLen)) + keyBuilder.Recycle() + } + + // finalize the aggregation + newBucket, err := updater.finalize(ctx, prollyMap.NodeStore()) + if err != nil { + sc.descError("get histogram bucket for node", err) + return + } + err = sc.kv.PutBucket(ctx, n.HashOf(), newBucket, keyBuilder) + if err != nil { + sc.descError("get histogram bucket for node", err) + return + } + buckets = append(buckets, newBucket) + }) + if err != nil { + return nil, nil, err + } + offset += uint64(treeCnt) + } + + return buckets, lowerBound, nil +} + +func (sc *StatsCoord) updateTable(ctx *sql.Context, tableName string, sqlDb dsess.SqlDatabase) (tableIndexesKey, []*stats.Statistic, error) { + var err error + var sqlTable *sqle.DoltTable + var dTab *doltdb.Table + if err := sc.sq.DoSync(ctx, func() { + sqlTable, dTab, err = GetLatestTable(ctx, tableName, sqlDb) + if err != nil { + sc.descError("GetLatestTable", err) + } + }); err != nil { + return tableIndexesKey{}, nil, err + } + + tableKey := tableIndexesKey{ + db: sqlDb.AliasedName(), + branch: sqlDb.Revision(), + table: tableName, + schema: "", + } + + var indexes []sql.Index + if err := sc.sq.DoSync(ctx, func() { + indexes, err = sqlTable.GetIndexes(ctx) + if err != nil { + sc.descError("", err) + } + }); err != nil { + return tableIndexesKey{}, nil, err + } + + var newTableStats []*stats.Statistic + for _, sqlIdx := range indexes { + var idx durable.Index + var err error + if strings.EqualFold(sqlIdx.ID(), "PRIMARY") { + idx, err = dTab.GetRowData(ctx) + } else { + idx, err = dTab.GetIndexRowData(ctx, sqlIdx.ID()) + } + if err != nil { + sc.descError("GetRowData", err) + continue + } + + var template stats.Statistic + if err := sc.sq.DoSync(ctx, func() { + _, template, err = sc.getTemplate(ctx, sqlTable, sqlIdx) + if err != nil { + sc.descError("", fmt.Errorf("stats collection failed to generate a statistic template: %s.%s.%s:%T; %s", sqlDb.RevisionQualifiedName(), tableName, sqlIdx, sqlIdx, err)) + } + }); err != nil { + return tableIndexesKey{}, nil, err + } else if template.Fds.Empty() { + return tableIndexesKey{}, nil, fmt.Errorf("failed to creat template for %s/%s/%s/%s", sqlDb.Revision(), sqlDb.AliasedName(), tableName, sqlIdx.ID()) + } + + idxLen := len(sqlIdx.Expressions()) + + prollyMap := durable.ProllyMapFromIndex(idx) + var levelNodes []tree.Node + if err := sc.sq.DoSync(ctx, func() { + levelNodes, err = tree.GetHistogramLevel(ctx, prollyMap.Tuples(), bucketLowCnt) + if err != nil { + sc.descError("", err) + } + return + }); err != nil { + return tableIndexesKey{}, nil, err + } + var buckets []*stats.Bucket + var firstBound sql.Row + if len(levelNodes) > 0 { + buckets, firstBound, err = sc.collectIndexNodes(ctx, prollyMap, idxLen, levelNodes) + if err != nil { + sc.descError("", err) + continue + } + } + newTableStats = append(newTableStats, sc.finalizeHistogram(template, buckets, firstBound)) + } + return tableKey, newTableStats, nil +} diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv.go b/go/libraries/doltcore/sqle/statspro/stats_kv.go new file mode 100644 index 00000000000..b24492597d3 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/stats_kv.go @@ -0,0 +1,556 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "encoding/binary" + "errors" + "fmt" + "strconv" + "strings" + "sync" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/dolthub/go-mysql-server/sql/types" + + "github.com/dolthub/dolt/go/libraries/doltcore/schema" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/val" +) + +var ErrIncompatibleVersion = errors.New("client stats version mismatch") + +const defaultBucketSize = 1024 // must be > 0 to avoid panic + +type StatsKv interface { + PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error + GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) + GetTemplate(key templateCacheKey) (stats.Statistic, bool) + PutTemplate(key templateCacheKey, stat stats.Statistic) + GetBound(h hash.Hash, len int) (sql.Row, bool) + PutBound(h hash.Hash, r sql.Row, l int) + Flush(ctx context.Context) (int, error) + StartGc(ctx context.Context, sz int) error + MarkBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) error + FinishGc(context.Context) error + Len() int +} + +var _ StatsKv = (*prollyStats)(nil) +var _ StatsKv = (*memStats)(nil) + +func NewMemStats() *memStats { + return &memStats{ + mu: sync.Mutex{}, + buckets: make(map[bucketKey]*stats.Bucket), + templates: make(map[templateCacheKey]stats.Statistic), + bounds: make(map[bucketKey]sql.Row), + } +} + +type memStats struct { + mu sync.Mutex + doGc bool + + //buckets *lru.Cache[bucketKey, *stats.Bucket] + //nextBuckets *lru.Cache[bucketKey, *stats.Bucket] + buckets map[bucketKey]*stats.Bucket + nextBuckets map[bucketKey]*stats.Bucket + + templates map[templateCacheKey]stats.Statistic + nextTemplates map[templateCacheKey]stats.Statistic + + bounds map[bucketKey]sql.Row + nextBounds map[bucketKey]sql.Row + + epochCnt int +} + +func (m *memStats) StorageCnt(context.Context) (int, error) { + return 0, nil +} + +func (m *memStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) { + m.mu.Lock() + defer m.mu.Unlock() + t, ok := m.templates[key] + if !ok { + return stats.Statistic{}, false + } + if m.doGc { + m.nextTemplates[key] = t + } + return t, true +} + +func (m *memStats) PutTemplate(key templateCacheKey, stat stats.Statistic) { + m.mu.Lock() + defer m.mu.Unlock() + m.templates[key] = stat + if m.doGc { + m.nextTemplates[key] = stat + } +} + +type bucketKey [22]byte + +func getBucketKey(h hash.Hash, l int) bucketKey { + var k bucketKey + copy(k[:hash.ByteLen], h[:]) + binary.BigEndian.PutUint16(k[hash.ByteLen:], uint16(l)) + return k +} + +func (m *memStats) GetBound(h hash.Hash, l int) (sql.Row, bool) { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, l) + r, ok := m.bounds[k] + if !ok { + return nil, false + } + if m.doGc { + m.nextBounds[k] = r + } + return r, true +} + +func (m *memStats) PutBound(h hash.Hash, r sql.Row, l int) { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, l) + m.bounds[k] = r + if m.doGc { + m.nextBounds[k] = r + } +} + +func (m *memStats) StartGc(ctx context.Context, sz int) error { + m.mu.Lock() + defer m.mu.Unlock() + m.doGc = true + if sz == 0 { + sz = len(m.buckets) * 2 + } + var err error + //m.nextBuckets, err = lru.New[bucketKey, *stats.Bucket](sz) + m.nextBuckets = make(map[bucketKey]*stats.Bucket, sz) + if err != nil { + return err + } + m.nextBounds = make(map[bucketKey]sql.Row) + m.nextTemplates = make(map[templateCacheKey]stats.Statistic) + return nil +} + +func (m *memStats) RestartEpoch() { + m.mu.Lock() + defer m.mu.Unlock() + m.epochCnt = 0 +} + +func (m *memStats) FinishGc(context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + m.buckets = m.nextBuckets + m.templates = m.nextTemplates + m.bounds = m.nextBounds + m.nextBuckets = nil + m.nextTemplates = nil + m.nextBounds = nil + m.doGc = false + return nil +} + +func (m *memStats) Len() int { + m.mu.Lock() + defer m.mu.Unlock() + return len(m.buckets) +} + +func (m *memStats) PutBucket(_ context.Context, h hash.Hash, b *stats.Bucket, _ *val.TupleBuilder) error { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, len(b.BoundVal)) + m.buckets[k] = b + return nil +} + +func (m *memStats) MarkBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) error { + m.mu.Lock() + defer m.mu.Unlock() + k := getBucketKey(h, tupB.Desc.Count()) + b, ok := m.buckets[k] + if ok { + m.nextBuckets[k] = b + } + return nil +} + +func (m *memStats) GetBucket(_ context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) { + m.mu.Lock() + defer m.mu.Unlock() + if h.IsEmpty() { + return nil, false, nil + } + k := getBucketKey(h, tupB.Desc.Count()) + b, ok := m.buckets[k] + return b, ok, nil +} + +func (m *memStats) Flush(_ context.Context) (int, error) { + return 0, nil +} + +func NewProllyStats(ctx context.Context, destDb dsess.SqlDatabase) (*prollyStats, error) { + sch := schema.StatsTableDoltSchema + kd, vd := sch.GetMapDescriptors() + + keyBuilder := val.NewTupleBuilder(kd) + valueBuilder := val.NewTupleBuilder(vd) + newMap, err := prolly.NewMapFromTuples(ctx, destDb.DbData().Ddb.NodeStore(), kd, vd) + if err != nil { + return nil, err + } + + return &prollyStats{ + mu: sync.Mutex{}, + destDb: destDb, + kb: keyBuilder, + vb: valueBuilder, + m: newMap.Mutate(), + mem: NewMemStats(), + }, nil +} + +type prollyStats struct { + mu sync.Mutex + destDb dsess.SqlDatabase + kb, vb *val.TupleBuilder + m *prolly.MutableMap + newM *prolly.MutableMap + mem *memStats +} + +func (p *prollyStats) Len() int { + return p.mem.Len() +} + +func (p *prollyStats) GetTemplate(key templateCacheKey) (stats.Statistic, bool) { + return p.mem.GetTemplate(key) +} + +func (p *prollyStats) PutTemplate(key templateCacheKey, stat stats.Statistic) { + p.mem.PutTemplate(key, stat) +} + +func (p *prollyStats) GetBound(h hash.Hash, l int) (sql.Row, bool) { + return p.mem.GetBound(h, l) +} + +func (p *prollyStats) PutBound(h hash.Hash, r sql.Row, l int) { + p.mem.PutBound(h, r, l) +} + +func (p *prollyStats) PutBucket(ctx context.Context, h hash.Hash, b *stats.Bucket, tupB *val.TupleBuilder) error { + if err := p.mem.PutBucket(ctx, h, b, tupB); err != nil { + return err + } + + k, err := p.encodeHash(h, tupB.Desc.Count()) + if err != nil { + return err + } + v, err := p.encodeBucket(ctx, b, tupB) + if err != nil { + return err + } + + p.mu.Lock() + defer p.mu.Unlock() + return p.m.Put(ctx, k, v) +} + +func (p *prollyStats) GetBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) (*stats.Bucket, bool, error) { + if h.IsEmpty() { + return nil, false, nil + } + b, ok, err := p.mem.GetBucket(ctx, h, tupB) + if err != nil { + return nil, false, err + } + if ok { + return b, true, nil + } + + // missing bucket and not GC'ing, try disk + k, err := p.encodeHash(h, tupB.Desc.Count()) + if err != nil { + return nil, false, err + } + + var v val.Tuple + err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error { + if key != nil { + ok = true + v = value + } + return nil + }) + if !ok || err != nil { + return nil, false, err + } + + if tupB == nil { + // still function if treating like memStats + return nil, true, nil + } + + b, err = p.decodeBucketTuple(ctx, v, tupB) + if err != nil { + return nil, false, err + } + + p.mem.PutBucket(ctx, h, b, tupB) + return b, true, nil +} + +func (p *prollyStats) Flush(ctx context.Context) (int, error) { + p.mu.Lock() + defer p.mu.Unlock() + + flushedMap, err := p.m.Map(ctx) + if err != nil { + return 0, err + } + if err := p.destDb.DbData().Ddb.SetStatistics(ctx, "main", flushedMap.HashOf()); err != nil { + return 0, err + } + + cnt, err := flushedMap.Count() + return cnt, err +} + +func (p *prollyStats) StartGc(ctx context.Context, sz int) error { + p.mu.Lock() + defer p.mu.Unlock() + if err := p.mem.StartGc(ctx, sz); err != nil { + return err + } + kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors() + newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd) + if err != nil { + return err + } + p.newM = newMap.Mutate() + + return nil +} + +func (p *prollyStats) MarkBucket(ctx context.Context, h hash.Hash, tupB *val.TupleBuilder) error { + p.mem.MarkBucket(ctx, h, tupB) + + // try disk + k, err := p.encodeHash(h, tupB.Desc.Count()) + if err != nil { + return err + } + + p.mu.Lock() + defer p.mu.Unlock() + + var v val.Tuple + var ok bool + err = p.m.Get(ctx, k, func(key val.Tuple, value val.Tuple) error { + if key != nil { + ok = true + v = value + } + return nil + }) + if err != nil { + return err + } + if !ok { + return nil + } + + return p.newM.Put(ctx, k, v) +} + +func (p *prollyStats) FinishGc(context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + p.mem.FinishGc(nil) + m, err := p.newM.Map(context.Background()) + if err != nil { + return err + } + p.m = m.Mutate() + p.newM = nil + + return nil +} + +func (p *prollyStats) encodeHash(h hash.Hash, len int) (val.Tuple, error) { + p.mu.Lock() + defer p.mu.Unlock() + p.kb.PutInt64(0, int64(len)) + if err := p.kb.PutString(1, h.String()); err != nil { + return nil, err + } + return p.kb.Build(p.m.NodeStore().Pool()), nil +} + +func (p *prollyStats) decodeHashTuple(v val.Tuple) (int, hash.Hash, error) { + l, ok := p.kb.Desc.GetInt64(0, v) + hStr, ok := p.kb.Desc.GetString(1, v) + if !ok { + return 0, hash.Hash{}, fmt.Errorf("unexpected null hash") + } + return int(l), hash.Parse(hStr), nil +} + +func (p *prollyStats) decodeBucketTuple(ctx context.Context, v val.Tuple, tupB *val.TupleBuilder) (*stats.Bucket, error) { + var row []interface{} + for i := 0; i < p.vb.Desc.Count(); i++ { + f, err := tree.GetField(ctx, p.vb.Desc, i, v, p.m.NodeStore()) + if err != nil { + return nil, err + } + row = append(row, f) + } + + version := row[0] + if version != schema.StatsVersion { + return nil, fmt.Errorf("%w: write version %d does not match read version %d", ErrIncompatibleVersion, version, schema.StatsVersion) + } + rowCount := row[1].(int64) + distinctCount := row[2].(int64) + nullCount := row[3].(int64) + boundRowStr := row[4].(string) + upperBoundCnt := row[5].(int64) + mcvCountsStr := row[10].(string) + + boundRow, err := DecodeRow(ctx, p.m.NodeStore(), boundRowStr, tupB) + if err != nil { + return nil, err + } + + var mcvCnts []uint64 + if len(mcvCountsStr) > 0 { + for _, c := range strings.Split(mcvCountsStr, ",") { + cnt, err := strconv.ParseInt(c, 10, 64) + if err != nil { + return nil, err + } + mcvCnts = append(mcvCnts, uint64(cnt)) + } + } + + mcvs := make([]sql.Row, 4) + for i, v := range row[6:10] { + if v != nil && v != "" { + row, err := DecodeRow(ctx, p.m.NodeStore(), v.(string), tupB) + if err != nil { + return nil, err + } + mcvs[i] = row + } + } + + return &stats.Bucket{ + RowCnt: uint64(rowCount), + DistinctCnt: uint64(distinctCount), + NullCnt: uint64(nullCount), + McvsCnt: mcvCnts, + BoundCnt: uint64(upperBoundCnt), + BoundVal: boundRow, + McvVals: mcvs, + }, nil +} + +var mcvTypes = []sql.Type{types.Int16, types.Int16, types.Int16, types.Int16} + +func (p *prollyStats) encodeBucket(ctx context.Context, b *stats.Bucket, tupB *val.TupleBuilder) (val.Tuple, error) { + p.mu.Lock() + defer p.mu.Unlock() + + p.vb.PutInt64(0, schema.StatsVersion) + p.vb.PutInt64(1, int64(b.RowCount())) + p.vb.PutInt64(2, int64(b.DistinctCount())) + p.vb.PutInt64(3, int64(b.NullCount())) + boundRow, err := EncodeRow(ctx, p.m.NodeStore(), b.UpperBound(), tupB) + if err != nil { + return nil, err + } + p.vb.PutString(4, string(boundRow)) + p.vb.PutInt64(5, int64(b.BoundCount())) + for i, r := range b.Mcvs() { + mcvRow, err := EncodeRow(ctx, p.m.NodeStore(), r, tupB) + if err != nil { + return nil, err + } + p.vb.PutString(6+i, string(mcvRow)) + } + var mcvCntsRow sql.Row + for _, v := range b.McvCounts() { + mcvCntsRow = append(mcvCntsRow, int(v)) + } + p.vb.PutString(10, stats.StringifyKey(mcvCntsRow, mcvTypes[:len(mcvCntsRow)])) + + return p.vb.Build(p.m.NodeStore().Pool()), nil +} + +func (p *prollyStats) NewEmpty(ctx context.Context) (StatsKv, error) { + kd, vd := schema.StatsTableDoltSchema.GetMapDescriptors() + newMap, err := prolly.NewMapFromTuples(ctx, p.destDb.DbData().Ddb.NodeStore(), kd, vd) + if err != nil { + return nil, err + } + m := newMap.Mutate() + return &prollyStats{m: m, destDb: p.destDb, kb: p.kb, vb: p.vb}, nil +} + +func EncodeRow(ctx context.Context, ns tree.NodeStore, r sql.Row, tb *val.TupleBuilder) ([]byte, error) { + for i := range tb.Desc.Count() { + v := r[i] + if v == nil { + continue + } + if err := tree.PutField(ctx, ns, tb, i, v); err != nil { + return nil, err + } + } + return tb.Build(ns.Pool()), nil +} + +func DecodeRow(ctx context.Context, ns tree.NodeStore, s string, tb *val.TupleBuilder) (sql.Row, error) { + tup := []byte(s) + r := make(sql.Row, tb.Desc.Count()) + var err error + for i, _ := range r { + r[i], err = tree.GetField(ctx, tb.Desc, i, tup, ns) + if err != nil { + return nil, err + } + } + return r, nil +} diff --git a/go/libraries/doltcore/sqle/statspro/stats_kv_test.go b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go new file mode 100644 index 00000000000..94907998137 --- /dev/null +++ b/go/libraries/doltcore/sqle/statspro/stats_kv_test.go @@ -0,0 +1,231 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statspro + +import ( + "context" + "strconv" + "strings" + "testing" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/stats" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/val" +) + +func TestProllyKv(t *testing.T) { + threads := sql.NewBackgroundThreads() + prollyKv := newTestProllyKv(t, threads) + + h := hash.Parse(strings.Repeat("a", hash.StringLen)) + h2 := hash.Parse(strings.Repeat("b", hash.StringLen)) + k := getBucketKey(h, 2) + + tupB := val.NewTupleBuilder(val.NewTupleDescriptor( + val.Type{Enc: val.Int64Enc, Nullable: true}, + val.Type{Enc: val.StringEnc, Nullable: true}, + )) + + t.Run("test bounds", func(t *testing.T) { + exp := sql.Row{1, 1} + prollyKv.PutBound(h, exp, 2) + cmp, ok := prollyKv.GetBound(h, 2) + require.True(t, ok) + require.Equal(t, exp, cmp) + + _, ok = prollyKv.GetBound(h2, 2) + require.False(t, ok) + }) + + t.Run("test templates", func(t *testing.T) { + exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}} + key := templateCacheKey{ + h: h, + idxName: "PRIMARY", + } + prollyKv.PutTemplate(key, exp) + cmp, ok := prollyKv.GetTemplate(key) + require.True(t, ok) + require.Equal(t, exp, cmp) + + key2 := templateCacheKey{ + h: h2, + idxName: "PRIMARY", + } + _, ok = prollyKv.GetTemplate(key2) + require.False(t, ok) + }) + + t.Run("test buckets", func(t *testing.T) { + exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket) + err := prollyKv.PutBucket(context.Background(), h, exp, tupB) + require.NoError(t, err) + cmp, ok, err := prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp, cmp) + + _, ok, err = prollyKv.GetBucket(context.Background(), h2, tupB) + require.NoError(t, err) + require.False(t, ok) + + // delete from memory, should pull from disk when |tupB| supplied + delete(prollyKv.mem.buckets, k) + + cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp, cmp) + + cmp, ok, err = prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp.RowCnt, cmp.RowCnt) + require.Equal(t, exp.DistinctCnt, cmp.DistinctCnt) + require.Equal(t, exp.NullCnt, cmp.NullCnt) + require.Equal(t, exp.McvsCnt, cmp.McvsCnt) + require.Equal(t, exp.McvVals[0], cmp.McvVals[0]) + require.Equal(t, exp.McvVals[1], cmp.McvVals[1]) + require.Equal(t, exp.McvVals[2], cmp.McvVals[2]) + require.Equal(t, exp.McvVals[3], cmp.McvVals[3]) + require.Equal(t, exp.BoundVal, cmp.BoundVal) + require.Equal(t, exp.BoundCnt, cmp.BoundCnt) + }) + + t.Run("test bucket GC", func(t *testing.T) { + exp := stats.NewHistogramBucket(15, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket) + err := prollyKv.PutBucket(context.Background(), h, exp, tupB) + require.NoError(t, err) + + exp2 := stats.NewHistogramBucket(10, 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket) + err = prollyKv.PutBucket(context.Background(), h2, exp2, tupB) + require.NoError(t, err) + + prollyKv.StartGc(context.Background(), 10) + err = prollyKv.MarkBucket(context.Background(), h, tupB) + require.NoError(t, err) + err = prollyKv.MarkBucket(context.Background(), h2, tupB) + require.NoError(t, err) + + prollyKv.FinishGc(nil) + + m, _ := prollyKv.m.Map(context.Background()) + iter, _ := m.IterAll(context.Background()) + for i := range 2 { + k, _, err := iter.Next(context.Background()) + if i == 0 { + require.Equal(t, "( 2, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa )", prollyKv.kb.Desc.Format(k)) + } else if i == 1 { + require.Equal(t, "( 2, bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb )", prollyKv.kb.Desc.Format(k)) + } else if i == 2 { + require.Error(t, err) + } + } + + prollyKv.StartGc(context.Background(), 10) + err = prollyKv.MarkBucket(context.Background(), h2, tupB) + require.NoError(t, err) + prollyKv.FinishGc(nil) + + cmp2, ok, err := prollyKv.GetBucket(context.Background(), h2, tupB) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, exp2.BoundCount(), cmp2.BoundCnt) + // only tagged one bucket + require.Equal(t, 1, prollyKv.Len()) + }) + + t.Run("test overflow", func(t *testing.T) { + prollyKv.StartGc(context.Background(), 10) + prollyKv.FinishGc(nil) + + expLen := 2000 + var expected []hash.Hash + for i := range expLen { + exp := stats.NewHistogramBucket(uint64(i), 7, 3, 4, sql.Row{int64(1), "one"}, []uint64{5, 4, 3, 1}, []sql.Row{{int64(5), "six"}, {int64(4), "three"}, {int64(3), "seven"}, {int64(1), "one"}}).(*stats.Bucket) + nh := strconv.AppendInt(nil, int64(i), 10) + nh = append(nh, h[:hash.ByteLen-len(nh)]...) + newH := hash.New(nh) + expected = append(expected, newH) + err := prollyKv.PutBucket(context.Background(), newH, exp, tupB) + require.NoError(t, err) + } + + for _, h := range expected { + _, ok, err := prollyKv.GetBucket(context.Background(), h, tupB) + require.NoError(t, err) + require.True(t, ok) + } + + require.Equal(t, expLen, prollyKv.Len()) + }) + + t.Run("test bounds GC", func(t *testing.T) { + exp := sql.Row{1, 1} + prollyKv.PutBound(h, exp, 2) + prollyKv.PutBound(h2, exp, 2) + + prollyKv.StartGc(context.Background(), 10) + prollyKv.GetBound(h2, 2) + prollyKv.FinishGc(nil) + + require.Equal(t, 1, len(prollyKv.mem.bounds)) + }) + + t.Run("test templates GC", func(t *testing.T) { + exp := stats.Statistic{RowCnt: 50, Qual: sql.StatQualifier{Database: "mydb", Tab: "xy"}} + key := templateCacheKey{ + h: h, + idxName: "PRIMARY", + } + key2 := templateCacheKey{ + h: h2, + idxName: "PRIMARY", + } + prollyKv.PutTemplate(key, exp) + prollyKv.PutTemplate(key2, exp) + + prollyKv.StartGc(context.Background(), 10) + prollyKv.GetTemplate(key2) + prollyKv.FinishGc(nil) + + require.Equal(t, 1, len(prollyKv.mem.templates)) + }) + +} + +func newTestProllyKv(t *testing.T, threads *sql.BackgroundThreads) *prollyStats { + dEnv := dtestutils.CreateTestEnv() + + sqlEng, ctx := newTestEngine(context.Background(), dEnv, threads) + ctx.Session.SetClient(sql.Client{ + User: "billy boy", + Address: "bigbillie@fake.horse", + }) + require.NoError(t, executeQuery(ctx, sqlEng, "create database mydb")) + require.NoError(t, executeQuery(ctx, sqlEng, "use mydb")) + + startDbs := sqlEng.Analyzer.Catalog.DbProvider.AllDatabases(ctx) + + kv, err := NewProllyStats(ctx, startDbs[0].(dsess.SqlDatabase)) + require.NoError(t, err) + + return kv +} diff --git a/go/libraries/doltcore/sqle/statspro/stats_provider.go b/go/libraries/doltcore/sqle/statspro/stats_provider.go deleted file mode 100644 index 573e20b638a..00000000000 --- a/go/libraries/doltcore/sqle/statspro/stats_provider.go +++ /dev/null @@ -1,535 +0,0 @@ -// Copyright 2023 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package statspro - -import ( - "context" - "errors" - "fmt" - "path/filepath" - "strings" - "sync" - - "github.com/dolthub/go-mysql-server/sql" - - "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" - "github.com/dolthub/dolt/go/libraries/doltcore/env" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" - "github.com/dolthub/dolt/go/store/hash" - "github.com/dolthub/dolt/go/store/prolly/tree" -) - -var ErrFailedToLoad = errors.New("failed to load statistics") - -type indexMeta struct { - qual sql.StatQualifier - cols []string - newNodes []tree.Node - // updateOrdinals are [start, stop] tuples for each update chunk - updateOrdinals []updateOrdinal - keepChunks []sql.HistogramBucket - dropChunks []sql.HistogramBucket - allAddrs []hash.Hash -} - -type updateOrdinal struct { - start, stop uint64 -} - -func NewProvider(pro *sqle.DoltDatabaseProvider, sf StatsFactory) *Provider { - return &Provider{ - pro: pro, - sf: sf, - mu: &sync.Mutex{}, - statDbs: make(map[string]Database), - autoCtxCancelers: make(map[string]context.CancelFunc), - analyzeCtxCancelers: make(map[string]context.CancelFunc), - status: make(map[string]string), - lockedTables: make(map[string]bool), - } -} - -// Provider is the engine interface for reading and writing index statistics. -// Each database has its own statistics table that all tables/indexes in a db -// share. -type Provider struct { - mu *sync.Mutex - pro *sqle.DoltDatabaseProvider - sf StatsFactory - statDbs map[string]Database - autoCtxCancelers map[string]context.CancelFunc - analyzeCtxCancelers map[string]context.CancelFunc - starter sqle.InitDatabaseHook - status map[string]string - lockedTables map[string]bool -} - -// each database has one statistics table that is a collection of the -// table stats in the database -type dbToStats struct { - mu *sync.Mutex - dbName string - stats map[sql.StatQualifier]*DoltStats - statsDatabase Database - latestTableHashes map[string]hash.Hash -} - -func newDbStats(dbName string) *dbToStats { - return &dbToStats{ - mu: &sync.Mutex{}, - dbName: dbName, - stats: make(map[sql.StatQualifier]*DoltStats), - latestTableHashes: make(map[string]hash.Hash), - } -} - -var _ sql.StatsProvider = (*Provider)(nil) - -func (p *Provider) Close() error { - var lastErr error - for _, db := range p.statDbs { - if err := db.Close(); err != nil { - lastErr = err - } - } - return lastErr -} - -func (p *Provider) TryLockForUpdate(branch, db, table string) bool { - p.mu.Lock() - defer p.mu.Unlock() - lockId := fmt.Sprintf("%s.%s.%s", branch, db, table) - if ok := p.lockedTables[lockId]; ok { - return false - } - p.lockedTables[lockId] = true - return true -} - -func (p *Provider) UnlockTable(branch, db, table string) { - p.mu.Lock() - defer p.mu.Unlock() - lockId := fmt.Sprintf("%s.%s.%s", branch, db, table) - p.lockedTables[lockId] = false - return -} - -func (p *Provider) StartRefreshThread(ctx *sql.Context, pro dsess.DoltDatabaseProvider, name string, env *env.DoltEnv, db dsess.SqlDatabase) error { - err := p.starter(ctx, pro.(*sqle.DoltDatabaseProvider), name, env, db) - - if err != nil { - p.UpdateStatus(name, fmt.Sprintf("error restarting thread %s: %s", name, err.Error())) - return err - } - p.UpdateStatus(name, fmt.Sprintf("restarted thread: %s", name)) - return nil -} - -func (p *Provider) SetStarter(hook sqle.InitDatabaseHook) { - p.starter = hook -} - -func (p *Provider) CancelRefreshThread(dbName string) { - p.mu.Lock() - if cancel, ok := p.autoCtxCancelers[dbName]; ok { - cancel() - } - p.mu.Unlock() - p.UpdateStatus(dbName, fmt.Sprintf("cancelled thread: %s", dbName)) - -} - -func (p *Provider) ThreadStatus(dbName string) string { - p.mu.Lock() - defer p.mu.Unlock() - - if msg, ok := p.status[dbName]; ok { - return msg - } - return "no active stats thread" -} - -func (p *Provider) TrackedBranches(dbName string) []string { - db, ok := p.getStatDb(dbName) - if !ok { - return nil - } - return db.Branches() - -} - -func (p *Provider) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil, nil - } - - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - return p.GetTableDoltStats(ctx, branch, db, schemaName, table.Name()) -} - -func (p *Provider) GetTableDoltStats(ctx *sql.Context, branch, db, schema, table string) ([]sql.Statistic, error) { - statDb, ok := p.getStatDb(db) - if !ok || statDb == nil { - return nil, nil - } - - if branch == "" { - dSess := dsess.DSessFromSess(ctx.Session) - var err error - branch, err = dSess.GetBranch() - if err != nil { - return nil, nil - } - } - - var ret []sql.Statistic - for _, qual := range statDb.ListStatQuals(branch) { - if strings.EqualFold(db, qual.Database) && strings.EqualFold(schema, qual.Sch) && strings.EqualFold(table, qual.Tab) { - stat, _ := statDb.GetStat(branch, qual) - ret = append(ret, stat) - } - } - - return ret, nil -} - -func (p *Provider) setStatDb(name string, db Database) { - p.mu.Lock() - defer p.mu.Unlock() - p.statDbs[name] = db -} - -func (p *Provider) getStatDb(name string) (Database, bool) { - p.mu.Lock() - defer p.mu.Unlock() - statDb, ok := p.statDbs[strings.ToLower(name)] - return statDb, ok -} - -func (p *Provider) deleteStatDb(name string) { - p.mu.Lock() - defer p.mu.Unlock() - delete(p.statDbs, strings.ToLower(name)) -} - -func (p *Provider) SetStats(ctx *sql.Context, s sql.Statistic) error { - statDb, ok := p.getStatDb(s.Qualifier().Db()) - if !ok { - return nil - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil - } - - doltStat, err := DoltStatsFromSql(s) - if err != nil { - return err - } - - p.UpdateStatus(s.Qualifier().Db(), fmt.Sprintf("refreshed %s", s.Qualifier().Db())) - - return statDb.SetStat(ctx, branch, s.Qualifier(), doltStat) -} - -func (p *Provider) getQualStats(ctx *sql.Context, qual sql.StatQualifier) (*DoltStats, bool) { - statDb, ok := p.getStatDb(qual.Db()) - if !ok { - return nil, false - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil, false - } - - return statDb.GetStat(branch, qual) -} - -func (p *Provider) GetStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) (sql.Statistic, bool) { - stat, ok := p.getQualStats(ctx, qual) - if !ok { - return nil, false - } - return stat, true -} - -func (p *Provider) DropBranchDbStats(ctx *sql.Context, branch, db string, flush bool) error { - statDb, ok := p.getStatDb(db) - if !ok { - return nil - } - - p.mu.Lock() - defer p.mu.Unlock() - - p.status[db] = "dropped" - - return statDb.DeleteBranchStats(ctx, branch, flush) -} - -func (p *Provider) DropDbStats(ctx *sql.Context, db string, flush bool) error { - statDb, ok := p.getStatDb(db) - if !ok { - return nil - } - for _, branch := range statDb.Branches() { - // remove provider access - p.DropBranchDbStats(ctx, branch, db, flush) - } - - if flush { - p.deleteStatDb(db) - } - - return nil -} - -func (p *Provider) DropStats(ctx *sql.Context, qual sql.StatQualifier, _ []string) error { - statDb, ok := p.getStatDb(qual.Db()) - if !ok { - return nil - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return nil - } - - if _, ok := statDb.GetStat(branch, qual); ok { - statDb.DeleteStats(ctx, branch, qual) - p.UpdateStatus(qual.Db(), fmt.Sprintf("dropped statisic: %s", qual.String())) - } - - return nil -} - -func (p *Provider) UpdateStatus(db string, msg string) { - p.mu.Lock() - defer p.mu.Unlock() - - p.status[db] = msg -} - -func (p *Provider) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) { - statDb, ok := p.getStatDb(db) - if !ok { - return 0, sql.ErrDatabaseNotFound.New(db) - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return 0, err - } - - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary")) - if !ok { - return 0, nil - } - - return priStats.RowCount(), nil -} - -func (p *Provider) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) { - statDb, ok := p.getStatDb(db) - if !ok { - return 0, sql.ErrDatabaseNotFound.New(db) - } - - dSess := dsess.DSessFromSess(ctx.Session) - branch, err := dSess.GetBranch() - if err != nil { - return 0, err - } - - var schemaName string - if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) - } - - priStats, ok := statDb.GetStat(branch, sql.NewStatQualifier(db, schemaName, table.Name(), "primary")) - if !ok { - return 0, nil - } - - return priStats.AvgSize(), nil -} - -func (p *Provider) Prune(ctx *sql.Context) error { - dSess := dsess.DSessFromSess(ctx.Session) - - for _, sqlDb := range p.pro.DoltDatabases() { - dbName := strings.ToLower(sqlDb.Name()) - sqlDb, ok, err := dSess.Provider().SessionDatabase(ctx, dbName) - if err != nil { - return err - } - if !ok { - continue - } - statDb, ok := p.getStatDb(dbName) - if !ok { - continue - } - - // Canceling refresh thread prevents background thread from - // making progress. Prune should succeed. - p.CancelRefreshThread(dbName) - - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - - for _, branch := range statDb.Branches() { - err := func() error { - // function closure ensures safe defers - var stats []sql.Statistic - for _, t := range tables { - // XXX: avoid races with ANALYZE with the table locks. - // Either concurrent purge or analyze (or both) will fail. - if !p.TryLockForUpdate(branch, dbName, t) { - p.mu.Lock() - fmt.Println(p.lockedTables) - p.mu.Unlock() - return fmt.Errorf("concurrent statistics update and prune; retry prune when update is finished") - } - defer p.UnlockTable(branch, dbName, t) - - tableStats, err := p.GetTableDoltStats(ctx, branch, dbName, sqlDb.SchemaName(), t) - if err != nil { - return err - } - stats = append(stats, tableStats...) - } - - if err := p.DropBranchDbStats(ctx, branch, dbName, true); err != nil { - return err - } - - for _, s := range stats { - ds, ok := s.(*DoltStats) - if !ok { - return fmt.Errorf("unexpected statistics type found: %T", s) - } - if err := statDb.SetStat(ctx, branch, ds.Qualifier(), ds); err != nil { - return err - } - } - if err := statDb.Flush(ctx, branch); err != nil { - return err - } - return nil - }() - if err != nil { - return err - } - } - } - return nil -} - -func (p *Provider) Purge(ctx *sql.Context) error { - for _, sqlDb := range p.pro.DoltDatabases() { - dbName := strings.ToLower(sqlDb.Name()) - - tables, err := sqlDb.GetTableNames(ctx) - if err != nil { - return err - } - - var branches []string - db, ok := p.getStatDb(dbName) - if ok { - // Canceling refresh thread prevents background thread from - // making progress. Purge should succeed. - p.CancelRefreshThread(dbName) - - branches = db.Branches() - for _, branch := range branches { - err := func() error { - for _, t := range tables { - // XXX: avoid races with ANALYZE with the table locks. - // Either concurrent purge or analyze (or both) will fail. - if !p.TryLockForUpdate(branch, dbName, t) { - return fmt.Errorf("concurrent statistics update and prune; retry purge when update is finished") - } - defer p.UnlockTable(branch, dbName, t) - } - - err := p.DropBranchDbStats(ctx, branch, dbName, true) - if err != nil { - return fmt.Errorf("failed to drop stats: %w", err) - } - return nil - }() - if err != nil { - return err - } - } - } - - // if the database's failed to load, we still want to delete the folder - - fs, err := p.pro.FileSystemForDatabase(dbName) - if err != nil { - return err - } - - //remove from filesystem - statsFs, err := fs.WithWorkingDir(dbfactory.DoltStatsDir) - if err != nil { - return err - } - - if ok, _ := statsFs.Exists(""); ok { - if err := statsFs.Delete("", true); err != nil { - return err - } - } - - dropDbLoc, err := statsFs.Abs("") - if err != nil { - return err - } - - if err = dbfactory.DeleteFromSingletonCache(filepath.ToSlash(dropDbLoc + "/.dolt/noms")); err != nil { - return err - } - if len(branches) == 0 { - // if stats db was invalid on startup, recreate from baseline - branches = p.getStatsBranches(ctx) - } - p.Load(ctx, fs, sqlDb, branches) - } - return nil -} diff --git a/go/libraries/doltcore/sqle/system_variables.go b/go/libraries/doltcore/sqle/system_variables.go index 99e6c2f5a9b..6bccab80727 100644 --- a/go/libraries/doltcore/sqle/system_variables.go +++ b/go/libraries/doltcore/sqle/system_variables.go @@ -16,6 +16,7 @@ package sqle import ( "math" + "time" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" @@ -219,39 +220,39 @@ var DoltSystemVariables = []sql.SystemVariable{ Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshEnabled, + Name: dsess.DoltStatsEnabled, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled), - Default: int8(0), + Type: types.NewSystemBoolType(dsess.DoltStatsEnabled), + Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsBootstrapEnabled, + Name: dsess.DoltStatsMemoryOnly, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled), + Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly), Default: int8(0), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsMemoryOnly, + Name: dsess.DoltStatsJobInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly), - Default: int8(0), + Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false), + Default: int64(500 * time.Millisecond / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshThreshold, + Name: dsess.DoltStatsBranchInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10), - Default: float64(.5), + Type: types.NewSystemIntType(dsess.DoltStatsBranchInterval, 0, math.MaxInt, false), + Default: int64(time.Hour / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshInterval, + Name: dsess.DoltStatsGCInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false), - Default: 600, + Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false), + Default: int64(time.Hour / time.Millisecond), }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsBranches, @@ -446,39 +447,39 @@ func AddDoltSystemVariables() { Default: int8(0), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshEnabled, + Name: dsess.DoltStatsEnabled, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsAutoRefreshEnabled), - Default: int8(0), + Type: types.NewSystemBoolType(dsess.DoltStatsEnabled), + Default: int8(1), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsBootstrapEnabled, + Name: dsess.DoltStatsGCInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsBootstrapEnabled), - Default: int8(0), + Type: types.NewSystemIntType(dsess.DoltStatsGCInterval, 0, math.MaxInt, false), + Default: int64(time.Hour / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsMemoryOnly, + Name: dsess.DoltStatsJobInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly), - Default: int8(0), + Type: types.NewSystemIntType(dsess.DoltStatsJobInterval, 0, math.MaxInt, false), + Default: int64(500 * time.Millisecond / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshThreshold, + Name: dsess.DoltStatsBranchInterval, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemDoubleType(dsess.DoltStatsAutoRefreshThreshold, 0, 10), - Default: float64(.5), + Type: types.NewSystemIntType(dsess.DoltStatsBranchInterval, 0, math.MaxInt, false), + Default: int64(time.Hour / time.Millisecond), }, &sql.MysqlSystemVariable{ - Name: dsess.DoltStatsAutoRefreshInterval, + Name: dsess.DoltStatsMemoryOnly, Dynamic: true, Scope: sql.GetMysqlScope(sql.SystemVariableScope_Global), - Type: types.NewSystemIntType(dsess.DoltStatsAutoRefreshInterval, 0, math.MaxInt, false), - Default: 120, + Type: types.NewSystemBoolType(dsess.DoltStatsMemoryOnly), + Default: int8(0), }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsBranches, diff --git a/go/libraries/doltcore/sqle/tables.go b/go/libraries/doltcore/sqle/tables.go index e8fb46ea5d1..06765360bff 100644 --- a/go/libraries/doltcore/sqle/tables.go +++ b/go/libraries/doltcore/sqle/tables.go @@ -127,12 +127,12 @@ func (t *DoltTable) LookupForExpressions(ctx *sql.Context, exprs ...sql.Expressi return sql.IndexLookup{}, nil, nil, false, nil } - dbState, ok, err := sess.LookupDbState(ctx, t.db.Name()) + dbState, ok, err := sess.LookupDbState(ctx, t.db.AliasedName()) if err != nil { return sql.IndexLookup{}, nil, nil, false, nil } if !ok { - return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.Name()) + return sql.IndexLookup{}, nil, nil, false, fmt.Errorf("no state for database %s", t.db.AliasedName()) } var lookupCols []expression.LookupColumn diff --git a/go/libraries/doltcore/sqle/user_space_database.go b/go/libraries/doltcore/sqle/user_space_database.go index e54c03b7eb3..c3689e13a61 100644 --- a/go/libraries/doltcore/sqle/user_space_database.go +++ b/go/libraries/doltcore/sqle/user_space_database.go @@ -141,6 +141,10 @@ func (db *UserSpaceDatabase) RequestedName() string { return db.Name() } +func (db *UserSpaceDatabase) AliasedName() string { + return db.Name() +} + func (db *UserSpaceDatabase) GetSchema(ctx *sql.Context, schemaName string) (sql.DatabaseSchema, bool, error) { panic(fmt.Sprintf("GetSchema is not implemented for database %T", db)) } diff --git a/go/libraries/doltcore/remotestorage/internal/circular/buff.go b/go/libraries/utils/circular/buff.go similarity index 90% rename from go/libraries/doltcore/remotestorage/internal/circular/buff.go rename to go/libraries/utils/circular/buff.go index 2a5ba8866d1..36632a88085 100644 --- a/go/libraries/doltcore/remotestorage/internal/circular/buff.go +++ b/go/libraries/utils/circular/buff.go @@ -34,12 +34,20 @@ func (b *Buff[T]) Len() int { return b.len } +func (b *Buff[T]) Cap() int { + return cap(b.arr) +} + func (b *Buff[T]) At(i int) T { + return *b.at(i) +} + +func (b *Buff[T]) at(i int) *T { if i >= b.Len() { panic("At on Buff too small") } j := (b.front + i) % len(b.arr) - return b.arr[j] + return &b.arr[j] } func (b *Buff[T]) Front() T { @@ -50,6 +58,9 @@ func (b *Buff[T]) Pop() { if b.Len() == 0 { panic("Pop empty Buff") } + // Don't leak entries... + var empty T + *b.at(0) = empty b.front = (b.front + 1) % len(b.arr) b.len -= 1 } diff --git a/go/libraries/doltcore/remotestorage/internal/circular/buff_test.go b/go/libraries/utils/circular/buff_test.go similarity index 100% rename from go/libraries/doltcore/remotestorage/internal/circular/buff_test.go rename to go/libraries/utils/circular/buff_test.go diff --git a/go/performance/scripts/dg_sysbench.sh b/go/performance/scripts/dg_sysbench.sh new file mode 100755 index 00000000000..0ce8ca1927a --- /dev/null +++ b/go/performance/scripts/dg_sysbench.sh @@ -0,0 +1,145 @@ +#!/bin/bash +set -e +set -o pipefail + +SYSBENCH_TEST="oltp_insert_only" +WORKING_DIR=`mktemp -d` +PPROF=0 +PORT=5433 + +# parse options +# superuser.com/questions/186272/ +while test $# -gt 0 +do + case "$1" in + + --new-new) export DOLT_DEFAULT_BIN_FORMAT="__DOLT__" && + export ENABLE_ROW_ITER_2=true + ;; + + --no-exchange) export SINGLE_THREAD_FEATURE_FLAG=true + ;; + + # benchmark with pprof profiling + --pprof) PPROF=1 + ;; + + # run dolt single threaded + --single) export GOMAXPROCS=1 + ;; + + --row2) export ENABLE_ROW_ITER_2=true + ;; + + --journal) export DOLT_ENABLE_CHUNK_JOURNAL=true + ;; + + # specify sysbench benchmark + *) SYSBENCH_TEST="$1" + ;; + + esac + shift +done + +if [ ! -d "./sysbench-lua-scripts" ]; then + git clone https://github.com/dolthub/sysbench-lua-scripts.git +fi + +# collect custom sysbench scripts +cp ./sysbench-lua-scripts/*.lua "$WORKING_DIR" +cd "$WORKING_DIR" + +# make a sql-server config file +cat < dolt-config.yaml +log_level: "info" + +behavior: + read_only: false + +user: + name: "user" + password: "pass" + +listener: + host: "0.0.0.0" + port: $PORT + read_timeout_millis: 28800000 + write_timeout_millis: 28800000 + +data_dir: . +YAML + +# start a server +mkdir sbtest +cd sbtest +doltgres -config="../dolt-config.yaml" 2> prepare.log & +SERVER_PID="$!" + +set -x + +sleep 1 + +ps aux | grep "doltgres" +lsof -iTCP -sTCP:LISTEN +echo $SERVER_PID +psql --port $PORT --host=0.0.0.0 --db=doltgres -c "create database sbtest" + + +# stop it if it crashes +cleanup() { + kill -15 "$SERVER_PID" +} +trap cleanup EXIT + +# setup benchmark +echo "benchmark $SYSBENCH_TEST bootstrapping at $WORKING_DIR" + + +sysbench \ + --db-driver="pgsql" \ + --pgsql-host="0.0.0.0" \ + --pgsql-port="$PORT" \ + --pgsql-user="user" \ + --pgsql-password="pass" \ + "$SYSBENCH_TEST" prepare + +# restart server to isolate bench run +kill -15 "$SERVER_PID" + +# maybe run with pprof +if [ "$PPROF" -eq 1 ]; then + doltgres --prof cpu -config="../dolt-config.yaml" 2> run.log & +else + doltgres -config="../dolt-config.yaml" 2> run.log & +fi +SERVER_PID="$!" +sleep 1 + + +# run benchmark +echo "benchmark $SYSBENCH_TEST starting at $WORKING_DIR" + +sysbench \ + --db-driver="pgsql" \ + --pgsql-host="0.0.0.0" \ + --pgsql-port="$PORT" \ + --pgsql-user="user" \ + --pgsql-password="pass" \ + --db-ps-mode=disable \ + --time=30 \ + --db-ps-mode=disable \ + "$SYSBENCH_TEST" run + +unset DOLT_ENABLE_CHUNK_JOURNAL +unset DOLT_DEFAULT_BIN_FORMAT +unset ENABLE_ROW_ITER_2 +unset SINGLE_THREAD_FEATURE_FLAG +unset GOMAXPROCS + +echo "benchmark $SYSBENCH_TEST complete at $WORKING_DIR" +if [ "$PPROF" -eq 1 ]; then + # parse run.log to output the profile location + head -n1 "$WORKING_DIR/run.log" | cut -d ":" -f 4 +fi +echo "" diff --git a/go/performance/utils/benchmark_runner/sysbench.go b/go/performance/utils/benchmark_runner/sysbench.go index 5953368b5b2..02e637b4920 100644 --- a/go/performance/utils/benchmark_runner/sysbench.go +++ b/go/performance/utils/benchmark_runner/sysbench.go @@ -21,9 +21,6 @@ import ( "os/exec" "path/filepath" "strings" - "time" - - "github.com/jmoiron/sqlx" "github.com/google/uuid" ) @@ -149,10 +146,6 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) { return nil, err } - if err := t.collectStats(ctx); err != nil { - return nil, err - } - fmt.Println("Running test", t.test.GetName()) rs, err := t.run(ctx) @@ -162,76 +155,3 @@ func (t *sysbenchTesterImpl) Test(ctx context.Context) (*Result, error) { return rs, nil } - -func (t *sysbenchTesterImpl) collectStats(ctx context.Context) error { - if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") { - db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/test", t.serverConfig.GetHost(), t.serverConfig.GetPort())) - if err != nil { - return err - } - return collectStats(ctx, db) - } - return nil -} - -func collectStats(ctx context.Context, db *sqlx.DB) error { - c, err := db.Connx(ctx) - if err != nil { - return err - } - - { - // configuration, restart, and check needs to be in the same session - tx, err := c.BeginTxx(ctx, nil) - if err != nil { - return err - } - - if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_enabled = 1;"); err != nil { - return err - } - if _, err := tx.Exec("set @@GLOBAL.dolt_stats_auto_refresh_interval = 0;"); err != nil { - return err - } - if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_interval = 0;"); err != nil { - return err - } - if _, err := tx.Exec("set @@PERSIST.dolt_stats_auto_refresh_enabled = 1;"); err != nil { - return err - } - if _, err := tx.Exec("call dolt_stats_restart();"); err != nil { - return err - } - - rows := map[string]interface{}{"cnt": 0} - tick := time.NewTicker(5 * time.Second) - for { - if rows["cnt"] != 0 { - fmt.Printf("collected %d histogram buckets\n", rows["cnt"]) - break - } - select { - case <-tick.C: - res, err := tx.Queryx("select count(*) as cnt from dolt_statistics;") - if err != nil { - return err - } - if !res.Next() { - return fmt.Errorf("failed to set statistics") - } - if err := res.MapScan(rows); err != nil { - return err - } - if err := res.Close(); err != nil { - return err - } - } - } - } - - if _, err := c.QueryContext(ctx, "call dolt_stats_stop();"); err != nil { - return err - } - - return nil -} diff --git a/go/performance/utils/benchmark_runner/tpcc.go b/go/performance/utils/benchmark_runner/tpcc.go index 4c7f01a2444..be265e6b568 100644 --- a/go/performance/utils/benchmark_runner/tpcc.go +++ b/go/performance/utils/benchmark_runner/tpcc.go @@ -20,9 +20,6 @@ import ( "os" "os/exec" "path/filepath" - "strings" - - "github.com/jmoiron/sqlx" ) type tpccTesterImpl struct { @@ -54,17 +51,6 @@ func (t *tpccTesterImpl) outputToResult(output []byte) (*Result, error) { return OutputToResult(output, t.serverConfig.GetServerType(), t.serverConfig.GetVersion(), t.test.GetName(), t.test.GetId(), t.suiteId, t.config.GetRuntimeOs(), t.config.GetRuntimeGoArch(), t.serverParams, t.test.GetParamsToSlice(), nil, false) } -func (t *tpccTesterImpl) collectStats(ctx context.Context) error { - if strings.Contains(t.serverConfig.GetServerExec(), "dolt") && !strings.Contains(t.serverConfig.GetServerExec(), "doltgres") { - db, err := sqlx.Open("mysql", fmt.Sprintf("root:@tcp(%s:%d)/sbt", t.serverConfig.GetHost(), t.serverConfig.GetPort())) - if err != nil { - return err - } - return collectStats(ctx, db) - } - return nil -} - func (t *tpccTesterImpl) prepare(ctx context.Context) error { args := t.test.GetPrepareArgs(t.serverConfig) cmd := exec.CommandContext(ctx, t.tpccCommand, args...) @@ -119,10 +105,6 @@ func (t *tpccTesterImpl) Test(ctx context.Context) (*Result, error) { return nil, err } - if err := t.collectStats(ctx); err != nil { - return nil, err - } - fmt.Println("Running test", t.test.GetName()) rs, err := t.run(ctx) diff --git a/go/store/prolly/tree/mutator.go b/go/store/prolly/tree/mutator.go index e6474e16cbf..a03d042a4a0 100644 --- a/go/store/prolly/tree/mutator.go +++ b/go/store/prolly/tree/mutator.go @@ -17,6 +17,7 @@ package tree import ( "bytes" "context" + "fmt" "github.com/dolthub/dolt/go/store/prolly/message" ) @@ -132,7 +133,7 @@ func ApplyMutations[K ~[]byte, O Ordering[K], S message.Serializer]( prev := newKey newKey, newValue = edits.NextMutation(ctx) if newKey != nil { - assertTrue(order.Compare(K(newKey), K(prev)) > 0, "expected sorted edits") + assertTrue(order.Compare(K(newKey), K(prev)) > 0, "expected sorted edits"+fmt.Sprintf("%v, %v", prev, newKey)) } } diff --git a/go/store/prolly/tree/stats.go b/go/store/prolly/tree/stats.go index 1573d01893d..9611f3b583d 100644 --- a/go/store/prolly/tree/stats.go +++ b/go/store/prolly/tree/stats.go @@ -141,6 +141,11 @@ func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m Static // GetHistogramLevel returns the highest internal level of the tree that has // more than |low| addresses. func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) { + if cnt, err := m.Count(); err != nil { + return nil, err + } else if cnt == 0 { + return nil, nil + } currentLevel := []Node{m.Root} level := m.Root.Level() for len(currentLevel) < low && level > 0 { diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index f92bc8ce1cb..9b3a50ea139 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -15,6 +15,8 @@ package val import ( + "log" + "strconv" "time" "github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors" @@ -77,7 +79,7 @@ func NewTupleBuilder(desc TupleDesc) *TupleBuilder { func (tb *TupleBuilder) Build(pool pool.BuffPool) (tup Tuple) { for i, typ := range tb.Desc.Types { if !typ.Nullable && tb.fields[i] == nil { - panic("cannot write NULL to non-NULL field") + log.Println("cannot write NULL to non-NULL field: " + strconv.Itoa(i) + " " + string(tb.fields[i])) } } return tb.BuildPermissive(pool) diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index bd55519ab35..188c1f98829 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -639,7 +639,7 @@ func (td TupleDesc) formatValue(enc Encoding, i int, value []byte) string { case StringAddrEnc: return hex.EncodeToString(value) case CommitAddrEnc: - return hex.EncodeToString(value) + return hash.New(value).String()[:5] case CellEnc: return hex.EncodeToString(value) case ExtendedEnc: diff --git a/integration-tests/bats/stats.bats b/integration-tests/bats/stats.bats index 7cc4c4bf9f2..03ac1eefbcf 100644 --- a/integration-tests/bats/stats.bats +++ b/integration-tests/bats/stats.bats @@ -22,6 +22,7 @@ SQL cd $TMPDIRS/repo2 dolt init + dolt sql -q "SET @@PERSIST.dolt_stats_job_interval = 100" dolt sql < data.py -import random -import os +## bats test_tags=no_lambda +#@test "stats: boostrap abort over 1mm rows" { + #cat < data.py +#import random +#import os -rows = 2*1000*1000+1 +#rows = 2*1000*1000+1 -def main(): - f = open("data.csv","w+") - f.write("id,hostname\n") +#def main(): + #f = open("data.csv","w+") + #f.write("id,hostname\n") - for i in range(rows): - hostname = random.getrandbits(100) - f.write(f"{i},{hostname}\n") - if i % (500*1000) == 0: - print("row :", i) - f.flush() + #for i in range(rows): + #hostname = random.getrandbits(100) + #f.write(f"{i},{hostname}\n") + #if i % (500*1000) == 0: + #print("row :", i) + #f.flush() - f.close() + #f.close() -if __name__ == "__main__": - main() -EOF +#if __name__ == "__main__": + #main() +#EOF - mkdir repo3 - cd repo3 - python3 ../data.py + #mkdir repo3 + #cd repo3 + #python3 ../data.py - dolt init - dolt sql -q "create table f (id int primary key, hostname int)" - dolt table import -u --continue f data.csv + #dolt init + #dolt sql -q "create table f (id int primary key, hostname int)" + #dolt table import -u --continue f data.csv - dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;" + #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 1;" - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false - [ "${lines[2]}" = "0" ] -} + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[[ "${lines[0]}" =~ "stats bootstrap aborted" ]] || false + #[ "${lines[2]}" = "0" ] +#} -@test "stats: stats delete index schema change" { - cd repo2 +#@test "stats: stats delete index schema change" { + #cd repo2 - dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" - dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;" + #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" + #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;" - dolt sql -q "insert into xy values (0,0), (1,1)" - dolt sql -q "analyze table xy" + #dolt sql -q "insert into xy values (0,0), (1,1)" + #dolt sql -q "analyze table xy" - # stats OK after analyze - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "2" ] + ## stats OK after analyze + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[ "${lines[1]}" = "2" ] - dolt sql -q "alter table xy drop index y" + #dolt sql -q "alter table xy drop index y" - # load after schema change should purge - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "0" ] + ## load after schema change should purge + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[ "${lines[1]}" = "0" ] - dolt sql -q "analyze table xy" - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "1" ] -} + #dolt sql -q "analyze table xy" + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[ "${lines[1]}" = "1" ] +#} -@test "stats: stats recreate table without index" { - cd repo2 +#@test "stats: stats recreate table without index" { + #cd repo2 - dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" - dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;" + #dolt sql -q "set @@PERSIST.dolt_stats_bootstrap_enabled = 0;" + #dolt sql -q "set @@PERSIST.dolt_stats_auto_refresh_interval = 1;" - dolt sql -q "insert into xy values (0,0), (1,1)" - dolt sql -q "analyze table xy" + #dolt sql -q "insert into xy values (0,0), (1,1)" + #dolt sql -q "analyze table xy" - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "2" ] + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[ "${lines[1]}" = "2" ] - dolt sql -q "drop table xy" - dolt sql -q "create table xy (x int primary key, y int)" - dolt sql -q "insert into xy values (0,0), (1,1)" + #dolt sql -q "drop table xy" + #dolt sql -q "create table xy (x int primary key, y int)" + #dolt sql -q "insert into xy values (0,0), (1,1)" - # make sure no stats - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "0" ] + ## make sure no stats + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[ "${lines[1]}" = "0" ] - dolt sql -q "analyze table xy" + #dolt sql -q "analyze table xy" - run dolt sql -r csv -q "select count(*) from dolt_statistics" - [ "$status" -eq 0 ] - [ "${lines[1]}" = "1" ] + #run dolt sql -r csv -q "select count(*) from dolt_statistics" + #[ "$status" -eq 0 ] + #[ "${lines[1]}" = "1" ] - stop_sql_server -} + #stop_sql_server +#}