From cff431bcc13ce96174345f009522b48485348007 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 11 Feb 2025 10:09:33 -0800 Subject: [PATCH 1/3] Add DBLoader interface and implementations --- go/libraries/doltcore/dbfactory/aws.go | 13 +-- go/libraries/doltcore/dbfactory/factory.go | 41 +++++++- go/libraries/doltcore/dbfactory/file.go | 106 ++++++++++++++------- go/libraries/doltcore/dbfactory/grpc.go | 17 +--- go/libraries/doltcore/dbfactory/gs.go | 28 ++---- go/libraries/doltcore/dbfactory/mem.go | 16 ++-- go/libraries/doltcore/dbfactory/oci.go | 17 +--- go/libraries/doltcore/dbfactory/oss.go | 12 +-- go/libraries/doltcore/doltdb/doltdb.go | 18 +++- go/store/nbs/journal.go | 13 ++- go/store/nbs/store.go | 19 +++- 11 files changed, 185 insertions(+), 115 deletions(-) diff --git a/go/libraries/doltcore/dbfactory/aws.go b/go/libraries/doltcore/dbfactory/aws.go index eb6a5808243..c255439f8e4 100644 --- a/go/libraries/doltcore/dbfactory/aws.go +++ b/go/libraries/doltcore/dbfactory/aws.go @@ -30,9 +30,7 @@ import ( "github.com/dolthub/dolt/go/libraries/utils/awsrefreshcreds" "github.com/dolthub/dolt/go/store/chunks" - "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/nbs" - "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) @@ -117,19 +115,14 @@ func (fact AWSFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, } // CreateDB creates an AWS backed database -func (fact AWSFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - var db datas.Database +func (fact AWSFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (DBLoader, error) { cs, err := fact.newChunkStore(ctx, nbf, urlObj, params) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(cs) - ns := tree.NewNodeStore(cs) - db = datas.NewTypesDatabase(vrw, ns) - - return db, vrw, ns, nil + return ChunkStoreLoader{cs: cs}, nil } func (fact AWSFactory) newChunkStore(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (chunks.ChunkStore, error) { diff --git a/go/libraries/doltcore/dbfactory/factory.go b/go/libraries/doltcore/dbfactory/factory.go index dcb19f14386..af955622da3 100644 --- a/go/libraries/doltcore/dbfactory/factory.go +++ b/go/libraries/doltcore/dbfactory/factory.go @@ -17,6 +17,7 @@ package dbfactory import ( "context" "fmt" + "github.com/dolthub/dolt/go/store/chunks" "net/url" "strings" @@ -57,10 +58,32 @@ const ( defaultMemTableSize = 256 * 1024 * 1024 ) +type DBLoader interface { + IsAccessModeReadOnly() bool + LoadDB(ctx context.Context) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) +} + +type ChunkStoreLoader struct { + cs chunks.ChunkStore +} + +var _ DBLoader = ChunkStoreLoader{} + +func (l ChunkStoreLoader) IsAccessModeReadOnly() bool { + return false +} + +func (l ChunkStoreLoader) LoadDB(ctx context.Context) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { + vrw := types.NewValueStore(l.cs) + ns := tree.NewNodeStore(l.cs) + db := datas.NewTypesDatabase(vrw, ns) + return db, vrw, ns, nil +} + // DBFactory is an interface for creating concrete datas.Database instances from different backing stores type DBFactory interface { - // CreateDB returns the database located at the URL given and its associated data access interfaces - CreateDB(ctx context.Context, nbf *types.NomsBinFormat, u *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) + // GetDBLoader verifies the DB exists and acquires any necessary locks, returning an object that can be used to load the DB. + GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, u *url.URL, params map[string]interface{}) (DBLoader, error) // PrepareDB does any necessary setup work for a new database to be created at the URL given, e.g. to receive a push. // Not all factories support this operation. PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, u *url.URL, params map[string]interface{}) error @@ -83,10 +106,18 @@ var DBFactories = map[string]DBFactory{ // CreateDB creates a database based on the supplied urlStr, and creation params. The DBFactory used for creation is // determined by the scheme of the url. Naked urls will use https by default. func CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { + dbLoader, err := GetDBLoader(ctx, nbf, urlStr, params) + if err != nil { + return nil, nil, nil, err + } + return dbLoader.LoadDB(ctx) +} + +func GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, params map[string]interface{}) (DBLoader, error) { urlObj, err := earl.Parse(urlStr) if err != nil { - return nil, nil, nil, err + return nil, err } scheme := urlObj.Scheme @@ -95,10 +126,10 @@ func CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, para } if fact, ok := DBFactories[strings.ToLower(scheme)]; ok { - return fact.CreateDB(ctx, nbf, urlObj, params) + return fact.GetDBLoader(ctx, nbf, urlObj, params) } - return nil, nil, nil, fmt.Errorf("unknown url scheme: '%s'", urlObj.Scheme) + return nil, fmt.Errorf("unknown url scheme: '%s'", urlObj.Scheme) } // PrepareDB does the necessary work to create a database at the URL given, e.g. to ready a new remote for pushing. Not diff --git a/go/libraries/doltcore/dbfactory/file.go b/go/libraries/doltcore/dbfactory/file.go index 12e8ef23fe5..129db485af1 100644 --- a/go/libraries/doltcore/dbfactory/file.go +++ b/go/libraries/doltcore/dbfactory/file.go @@ -18,6 +18,7 @@ import ( "context" "errors" "fmt" + "github.com/dolthub/fslock" "net/url" "os" "path/filepath" @@ -62,12 +63,14 @@ type FileFactory struct { } type singletonDB struct { - ddb datas.Database - vrw types.ValueReadWriter - ns tree.NodeStore + lockFile *fslock.Lock + ddb datas.Database + vrw types.ValueReadWriter + ns tree.NodeStore } var singletonLock = new(sync.Mutex) +var singletonFileLocks = make(map[string]*fslock.Lock) var singletons = make(map[string]singletonDB) func CloseAllLocalDatabases() (err error) { @@ -114,46 +117,40 @@ func (fact FileFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, return nil } -// CreateDB creates a local filesys backed database -func (fact FileFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - singletonLock.Lock() - defer singletonLock.Unlock() - - if s, ok := singletons[urlObj.Path]; ok { - return s.ddb, s.vrw, s.ns, nil - } - - path, err := url.PathUnescape(urlObj.Path) - if err != nil { - return nil, nil, nil, err - } +type FileDBLoader struct { + urlPath string + path string + useJournal bool + nbf *types.NomsBinFormat + lockFile *fslock.Lock +} - path = filepath.FromSlash(path) - path = urlObj.Host + path +var _ DBLoader = FileDBLoader{} - err = validateDir(path) - if err != nil { - return nil, nil, nil, err - } +func (l FileDBLoader) IsAccessModeReadOnly() bool { + return l.lockFile == nil +} - var useJournal bool - if params != nil { - _, useJournal = params[ChunkJournalParam] +func (l FileDBLoader) LoadDB(ctx context.Context) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { + singletonLock.Lock() + defer singletonLock.Unlock() + if s, ok := singletons[l.urlPath]; ok { + return s.ddb, s.vrw, s.ns, nil } - var newGenSt *nbs.NomsBlockStore + var err error // TODO: Need to take both locks q := nbs.NewUnlimitedMemQuotaProvider() - if useJournal && chunkJournalFeatureFlag { - newGenSt, err = nbs.NewLocalJournalingStore(ctx, nbf.VersionString(), path, q) + if l.useJournal && chunkJournalFeatureFlag { + newGenSt, err = nbs.NewLocalJournalingStoreWithLock(ctx, l.lockFile, l.nbf.VersionString(), l.path, q) } else { - newGenSt, err = nbs.NewLocalStore(ctx, nbf.VersionString(), path, defaultMemTableSize, q) + newGenSt, err = nbs.NewLocalStoreWithLock(ctx, l.lockFile, l.nbf.VersionString(), l.path, defaultMemTableSize, q) } if err != nil { return nil, nil, nil, err } - oldgenPath := filepath.Join(path, "oldgen") + oldgenPath := filepath.Join(l.path, "oldgen") err = validateDir(oldgenPath) if err != nil { if !errors.Is(err, os.ErrNotExist) { @@ -171,7 +168,7 @@ func (fact FileFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, return nil, nil, nil, err } - ghostGen, err := nbs.NewGhostBlockStore(path) + ghostGen, err := nbs.NewGhostBlockStore(l.path) if err != nil { return nil, nil, nil, err } @@ -183,7 +180,7 @@ func (fact FileFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, ns := tree.NewNodeStore(st) ddb := datas.NewTypesDatabase(vrw, ns) - singletons[urlObj.Path] = singletonDB{ + singletons[l.urlPath] = singletonDB{ ddb: ddb, vrw: vrw, ns: ns, @@ -192,6 +189,51 @@ func (fact FileFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, return ddb, vrw, ns, nil } +// CreateDB creates a local filesys backed database +func (fact FileFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (dbLoader DBLoader, err error) { + singletonLock.Lock() + defer singletonLock.Unlock() + + path, err := url.PathUnescape(urlObj.Path) + if err != nil { + return nil, err + } + + path = filepath.FromSlash(path) + path = urlObj.Host + path + + err = validateDir(path) + if err != nil { + return nil, err + } + + var useJournal bool + if params != nil { + _, useJournal = params[ChunkJournalParam] + } + + var lock *fslock.Lock + + if s, ok := singletonFileLocks[urlObj.Path]; ok { + lock = s + } else { + lock, err = nbs.AcquireManifestLock(path) + if err != nil { + return nil, err + } + + singletonFileLocks[urlObj.Path] = lock + } + + return FileDBLoader{ + urlPath: urlObj.Path, + path: path, + useJournal: useJournal, + lockFile: lock, + nbf: nbf, + }, nil +} + func validateDir(path string) error { info, err := os.Stat(path) diff --git a/go/libraries/doltcore/dbfactory/grpc.go b/go/libraries/doltcore/dbfactory/grpc.go index 85204d2aab6..af90df0a6c5 100644 --- a/go/libraries/doltcore/dbfactory/grpc.go +++ b/go/libraries/doltcore/dbfactory/grpc.go @@ -27,8 +27,6 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/remotestorage" "github.com/dolthub/dolt/go/libraries/events" "github.com/dolthub/dolt/go/store/chunks" - "github.com/dolthub/dolt/go/store/datas" - "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) @@ -70,29 +68,24 @@ func NewDoltRemoteFactory(insecure bool) DoltRemoteFactory { // CreateDB creates a database backed by a remote server that implements the GRPC rpcs defined by // remoteapis.ChunkStoreServiceClient -func (fact DoltRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - var db datas.Database +func (fact DoltRemoteFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (DBLoader, error) { dpi, ok := params[GRPCDialProviderParam] if dpi == nil || !ok { - return nil, nil, nil, errors.New("DoltRemoteFactory.CreateDB must provide a GRPCDialProvider param through GRPCDialProviderParam") + return nil, errors.New("DoltRemoteFactory.GetDBLoader must provide a GRPCDialProvider param through GRPCDialProviderParam") } dp, ok := dpi.(GRPCDialProvider) if !ok { - return nil, nil, nil, errors.New("DoltRemoteFactory.CreateDB must provide a GRPCDialProvider param through GRPCDialProviderParam") + return nil, errors.New("DoltRemoteFactory.GetDBLoader must provide a GRPCDialProvider param through GRPCDialProviderParam") } cs, err := fact.newChunkStore(ctx, nbf, urlObj, params, dp) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(cs) - ns := tree.NewNodeStore(cs) - db = datas.NewTypesDatabase(vrw, ns) - - return db, vrw, ns, err + return ChunkStoreLoader{cs: cs}, nil } // If |params[NoCachingParameter]| is set in |params| of the CreateDB call for diff --git a/go/libraries/doltcore/dbfactory/gs.go b/go/libraries/doltcore/dbfactory/gs.go index a4ccbbb2533..79a095f8936 100644 --- a/go/libraries/doltcore/dbfactory/gs.go +++ b/go/libraries/doltcore/dbfactory/gs.go @@ -22,9 +22,7 @@ import ( "cloud.google.com/go/storage" "github.com/dolthub/dolt/go/store/blobstore" - "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/nbs" - "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) @@ -38,12 +36,11 @@ func (fact GSFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, u } // CreateDB creates an GCS backed database -func (fact GSFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - var db datas.Database +func (fact GSFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (DBLoader, error) { gcs, err := storage.NewClient(ctx) if err != nil { - return nil, nil, nil, err + return nil, err } bs := blobstore.NewGCSBlobstore(gcs, urlObj.Host, urlObj.Path) @@ -51,14 +48,10 @@ func (fact GSFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, ur gcsStore, err := nbs.NewBSStore(ctx, nbf.VersionString(), bs, defaultMemTableSize, q) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(gcsStore) - ns := tree.NewNodeStore(gcsStore) - db = datas.NewTypesDatabase(vrw, ns) - - return db, vrw, ns, nil + return ChunkStoreLoader{cs: gcsStore}, nil } // LocalBSFactory is a DBFactory implementation for creating a local filesystem blobstore backed databases for testing @@ -71,12 +64,11 @@ func (fact LocalBSFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinForm } // CreateDB creates a local filesystem blobstore backed database -func (fact LocalBSFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - var db datas.Database +func (fact LocalBSFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (DBLoader, error) { absPath, err := filepath.Abs(filepath.Join(urlObj.Host, urlObj.Path)) if err != nil { - return nil, nil, nil, err + return nil, err } bs := blobstore.NewLocalBlobstore(absPath) @@ -84,12 +76,8 @@ func (fact LocalBSFactory) CreateDB(ctx context.Context, nbf *types.NomsBinForma bsStore, err := nbs.NewBSStore(ctx, nbf.VersionString(), bs, defaultMemTableSize, q) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(bsStore) - ns := tree.NewNodeStore(bsStore) - db = datas.NewTypesDatabase(vrw, ns) - - return db, vrw, ns, err + return ChunkStoreLoader{cs: bsStore}, err } diff --git a/go/libraries/doltcore/dbfactory/mem.go b/go/libraries/doltcore/dbfactory/mem.go index 49f632e63d9..e99bc43422a 100644 --- a/go/libraries/doltcore/dbfactory/mem.go +++ b/go/libraries/doltcore/dbfactory/mem.go @@ -38,17 +38,21 @@ func (fact MemFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, // CreateDB creates an in memory backed database func (fact MemFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - var db datas.Database + dbLoader, err := fact.GetDBLoader(ctx, nbf, urlObj, params) + if err != nil { + return nil, nil, nil, err + } + return dbLoader.LoadDB(ctx) +} +// GetDBLoader creates an object that can be used to create a memory backed database +func (fact MemFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, u *url.URL, params map[string]interface{}) (DBLoader, error) { bs := blobstore.NewInMemoryBlobstore(uuid.New().String()) q := nbs.NewUnlimitedMemQuotaProvider() cs, err := nbs.NewBSStore(ctx, nbf.VersionString(), bs, defaultMemTableSize, q) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(cs) - ns := tree.NewNodeStore(cs) - db = datas.NewTypesDatabase(vrw, ns) - return db, vrw, ns, nil + return ChunkStoreLoader{cs: cs}, nil } diff --git a/go/libraries/doltcore/dbfactory/oci.go b/go/libraries/doltcore/dbfactory/oci.go index e6439c9efa8..e042520e304 100644 --- a/go/libraries/doltcore/dbfactory/oci.go +++ b/go/libraries/doltcore/dbfactory/oci.go @@ -22,9 +22,7 @@ import ( "github.com/oracle/oci-go-sdk/v65/objectstorage" "github.com/dolthub/dolt/go/store/blobstore" - "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/nbs" - "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) @@ -38,30 +36,25 @@ func (fact OCIFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, } // CreateDB creates an OCI backed database -func (fact OCIFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - var db datas.Database +func (fact OCIFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (DBLoader, error) { provider := common.DefaultConfigProvider() client, err := objectstorage.NewObjectStorageClientWithConfigurationProvider(provider) if err != nil { - return nil, nil, nil, err + return nil, err } bs, err := blobstore.NewOCIBlobstore(ctx, provider, client, urlObj.Host, urlObj.Path) if err != nil { - return nil, nil, nil, err + return nil, err } q := nbs.NewUnlimitedMemQuotaProvider() ociStore, err := nbs.NewNoConjoinBSStore(ctx, nbf.VersionString(), bs, defaultMemTableSize, q) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(ociStore) - ns := tree.NewNodeStore(ociStore) - db = datas.NewTypesDatabase(vrw, ns) - - return db, vrw, ns, nil + return ChunkStoreLoader{cs: ociStore}, nil } diff --git a/go/libraries/doltcore/dbfactory/oss.go b/go/libraries/doltcore/dbfactory/oss.go index 226df325c8a..e4f5510ba28 100644 --- a/go/libraries/doltcore/dbfactory/oss.go +++ b/go/libraries/doltcore/dbfactory/oss.go @@ -28,9 +28,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/dconfig" "github.com/dolthub/dolt/go/store/blobstore" "github.com/dolthub/dolt/go/store/chunks" - "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/nbs" - "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) @@ -67,17 +65,13 @@ func (fact OSSFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, } // CreateDB creates an OSS backed database -func (fact OSSFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { +func (fact OSSFactory) GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (DBLoader, error) { ossStore, err := fact.newChunkStore(ctx, nbf, urlObj, params) if err != nil { - return nil, nil, nil, err + return nil, err } - vrw := types.NewValueStore(ossStore) - ns := tree.NewNodeStore(ossStore) - db := datas.NewTypesDatabase(vrw, ns) - - return db, vrw, ns, nil + return ChunkStoreLoader{cs: ossStore}, nil } func (fact OSSFactory) newChunkStore(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (chunks.ChunkStore, error) { diff --git a/go/libraries/doltcore/doltdb/doltdb.go b/go/libraries/doltcore/doltdb/doltdb.go index 081657e8226..43a70a0b8d5 100644 --- a/go/libraries/doltcore/doltdb/doltdb.go +++ b/go/libraries/doltcore/doltdb/doltdb.go @@ -116,7 +116,19 @@ func LoadDoltDB(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, fs return LoadDoltDBWithParams(ctx, nbf, urlStr, fs, nil) } +func GetDBLoader(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, fs filesys.Filesys) (dbfactory.DBLoader, error) { + return GetDBLoaderWithParams(ctx, nbf, urlStr, fs, nil) +} + func LoadDoltDBWithParams(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, fs filesys.Filesys, params map[string]interface{}) (*DoltDB, error) { + dbLoader, err := GetDBLoaderWithParams(ctx, nbf, urlStr, fs, params) + if err != nil { + return nil, err + } + return LoadDB(ctx, dbLoader, urlStr) +} + +func GetDBLoaderWithParams(ctx context.Context, nbf *types.NomsBinFormat, urlStr string, fs filesys.Filesys, params map[string]interface{}) (dbfactory.DBLoader, error) { if urlStr == LocalDirDoltDB { exists, isDir := fs.Exists(dbfactory.DoltDataDir) if !exists { @@ -138,12 +150,16 @@ func LoadDoltDBWithParams(ctx context.Context, nbf *types.NomsBinFormat, urlStr params[dbfactory.ChunkJournalParam] = struct{}{} } + return dbfactory.GetDBLoader(ctx, nbf, urlStr, params) +} + +func LoadDB(ctx context.Context, dbLoader dbfactory.DBLoader, urlStr string) (*DoltDB, error) { // Pull the database name out of the URL string. For filesystem-based databases (e.g. in-memory or disk-based // filesystem implementations), we can determine the database name by looking at the filesystem path. This // won't work for other storage schemes though. name := findParentDirectory(urlStr, ".dolt") - db, vrw, ns, err := dbfactory.CreateDB(ctx, nbf, urlStr, params) + db, vrw, ns, err := dbLoader.LoadDB(ctx) if err != nil { return nil, err } diff --git a/go/store/nbs/journal.go b/go/store/nbs/journal.go index dc8deff8e19..11823188e80 100644 --- a/go/store/nbs/journal.go +++ b/go/store/nbs/journal.go @@ -519,17 +519,22 @@ func (c journalConjoiner) chooseConjoinees(upstream []tableSpec) (conjoinees, ke return } -// newJournalManifest makes a new file manifest. -func newJournalManifest(ctx context.Context, dir string) (m *journalManifest, err error) { - lock := fslock.New(filepath.Join(dir, lockFileName)) +func AcquireManifestLock(dir string) (lock *fslock.Lock, err error) { + lock = fslock.New(filepath.Join(dir, lockFileName)) // try to take the file lock. if we fail, make the manifest read-only. // if we succeed, hold the file lock until we close the journalManifest err = lock.LockWithTimeout(lockFileTimeout) if errors.Is(err, fslock.ErrTimeout) { lock, err = nil, nil // read only - } else if err != nil { + } + if err != nil { return nil, err } + return lock, nil +} + +// newJournalManifest makes a new file manifest. +func newJournalManifest(ctx context.Context, lock *fslock.Lock, dir string) (m *journalManifest, err error) { m = &journalManifest{dir: dir, lock: lock} var f *os.File diff --git a/go/store/nbs/store.go b/go/store/nbs/store.go index e49daa85956..ad6614fd49c 100644 --- a/go/store/nbs/store.go +++ b/go/store/nbs/store.go @@ -25,6 +25,7 @@ import ( "context" "errors" "fmt" + "github.com/dolthub/fslock" "io" "os" "path/filepath" @@ -565,10 +566,15 @@ func NewNoConjoinBSStore(ctx context.Context, nbfVerStr string, bs blobstore.Blo } func NewLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSize uint64, q MemoryQuotaProvider) (*NomsBlockStore, error) { - return newLocalStore(ctx, nbfVerStr, dir, memTableSize, defaultMaxTables, q) + lock := fslock.New(filepath.Join(dir, lockFileName)) + return newLocalStore(ctx, lock, nbfVerStr, dir, memTableSize, defaultMaxTables, q) } -func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSize uint64, maxTables int, q MemoryQuotaProvider) (*NomsBlockStore, error) { +func NewLocalStoreWithLock(ctx context.Context, lockFile *fslock.Lock, nbfVerStr string, dir string, memTableSize uint64, q MemoryQuotaProvider) (*NomsBlockStore, error) { + return newLocalStore(ctx, lockFile, nbfVerStr, dir, memTableSize, defaultMaxTables, q) +} + +func newLocalStore(ctx context.Context, lockFile *fslock.Lock, nbfVerStr string, dir string, memTableSize uint64, maxTables int, q MemoryQuotaProvider) (*NomsBlockStore, error) { cacheOnce.Do(makeGlobalCaches) if err := checkDir(dir); err != nil { return nil, err @@ -580,7 +586,7 @@ func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSi return nil, fmt.Errorf("cannot create NBS store for directory containing chunk journal: %s", dir) } - m, err := getFileManifest(ctx, dir, asyncFlush) + m, err := getFileManifest(ctx, lockFile, dir, asyncFlush) if err != nil { return nil, err } @@ -591,12 +597,17 @@ func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSi } func NewLocalJournalingStore(ctx context.Context, nbfVers, dir string, q MemoryQuotaProvider) (*NomsBlockStore, error) { + lock := fslock.New(filepath.Join(dir, lockFileName)) + return NewLocalJournalingStoreWithLock(ctx, lock, nbfVers, dir, q) +} + +func NewLocalJournalingStoreWithLock(ctx context.Context, lock *fslock.Lock, nbfVers, dir string, q MemoryQuotaProvider) (*NomsBlockStore, error) { cacheOnce.Do(makeGlobalCaches) if err := checkDir(dir); err != nil { return nil, err } - m, err := newJournalManifest(ctx, dir) + m, err := newJournalManifest(ctx, lock, dir) if err != nil { return nil, err } From f50692751a84c5bb8d43a424133fb0a573329e55 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 11 Feb 2025 10:10:38 -0800 Subject: [PATCH 2/3] Use DBLoader when loading Dolt environment. --- go/cmd/dolt/dolt.go | 13 +++++++------ go/libraries/doltcore/env/environment.go | 15 ++++++++++++--- go/libraries/doltcore/env/multi_repo_env.go | 9 +++++++-- go/store/nbs/file_manifest.go | 3 +-- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/go/cmd/dolt/dolt.go b/go/cmd/dolt/dolt.go index 44dde399ec4..a8d7a716f16 100644 --- a/go/cmd/dolt/dolt.go +++ b/go/cmd/dolt/dolt.go @@ -417,7 +417,7 @@ func runMain() int { args = nil // This is the dEnv passed to sub-commands, and is used to create the multi-repo environment. - dEnv := env.LoadWithoutDB(ctx, env.GetCurrentUserHomeDir, cfg.dataDirFS, doltdb.LocalDirDoltDB, doltversion.Version) + dEnv := env.Load(ctx, env.GetCurrentUserHomeDir, cfg.dataDirFS, doltdb.LocalDirDoltDB, doltversion.Version) if dEnv.CfgLoadErr != nil { cli.PrintErrln(color.RedString("Failed to load the global config. %v", dEnv.CfgLoadErr)) @@ -678,11 +678,8 @@ If you're interested in running this command against a remote host, hit us up on } var lookForServer bool - if targetEnv.DoltDB(ctx) != nil && targetEnv.IsAccessModeReadOnly(ctx) { - // If the loaded target environment has a doltDB and we do not - // have access to it, we look for a server. - lookForServer = true - } else if targetEnv.DoltDB(ctx) == nil { + + if !targetEnv.Exists() { // If the loaded environment itself does not have a doltDB, we // may want to look for a server. We do so if all of the // repositories in our MultiEnv are ReadOnly. This includes the @@ -695,6 +692,10 @@ If you're interested in running this command against a remote host, hit us up on return !allReposAreReadOnly, nil }) lookForServer = allReposAreReadOnly + } else if targetEnv.IsAccessModeReadOnly(ctx) { + // If the loaded target environment has a doltDB and we do not + // have access to it, we look for a server. + lookForServer = true } if lookForServer { localCreds, err := sqlserver.FindAndLoadLocalCreds(targetEnv.FS) diff --git a/go/libraries/doltcore/env/environment.go b/go/libraries/doltcore/env/environment.go index 5cda6857d69..e28a8673dca 100644 --- a/go/libraries/doltcore/env/environment.go +++ b/go/libraries/doltcore/env/environment.go @@ -18,6 +18,7 @@ import ( "context" "errors" "fmt" + "github.com/dolthub/dolt/go/store/chunks" "os" "path/filepath" "strings" @@ -37,7 +38,6 @@ import ( "github.com/dolthub/dolt/go/libraries/utils/concurrentmap" "github.com/dolthub/dolt/go/libraries/utils/config" "github.com/dolthub/dolt/go/libraries/utils/filesys" - "github.com/dolthub/dolt/go/store/chunks" "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/types" @@ -88,6 +88,7 @@ type DoltEnv struct { RepoState *RepoState RSLoadErr error + dbLoader dbfactory.DBLoader loadDBOnce sync.Once doltDB *doltdb.DoltDB DBLoadError error @@ -203,13 +204,14 @@ func LoadWithoutDB(_ context.Context, hdp HomeDirProvider, fs filesys.Filesys, u // Load loads the DoltEnv for the .dolt directory determined by resolving the specified urlStr with the specified Filesys. func Load(ctx context.Context, hdp HomeDirProvider, fs filesys.Filesys, urlStr string, version string) *DoltEnv { dEnv := LoadWithoutDB(ctx, hdp, fs, urlStr, version) - LoadDoltDB(ctx, dEnv.FS, dEnv.urlStr, dEnv) + dEnv.dbLoader, dEnv.DBLoadError = doltdb.GetDBLoader(ctx, types.Format_Default, urlStr, fs) + return dEnv } func LoadDoltDB(ctx context.Context, fs filesys.Filesys, urlStr string, dEnv *DoltEnv) { dEnv.loadDBOnce.Do(func() { - ddb, dbLoadErr := doltdb.LoadDoltDB(ctx, types.Format_Default, urlStr, fs) + ddb, dbLoadErr := doltdb.LoadDB(ctx, dEnv.dbLoader, urlStr) dEnv.doltDB = ddb dEnv.DBLoadError = dbLoadErr dEnv.urlStr = urlStr @@ -1269,6 +1271,13 @@ func (dEnv *DoltEnv) BulkDbEaFactory(ctx context.Context) editor.DbEaFactory { return editor.NewBulkImportTEAFactory(dEnv.DoltDB(ctx).ValueReadWriter(), tmpDir) } +func (dEnv *DoltEnv) Exists() bool { + return dEnv.dbLoader != nil || dEnv.doltDB != nil +} + func (dEnv *DoltEnv) IsAccessModeReadOnly(ctx context.Context) bool { + if dEnv.dbLoader != nil { + return dEnv.dbLoader.IsAccessModeReadOnly() + } return dEnv.DoltDB(ctx).AccessMode() == chunks.ExclusiveAccessMode_ReadOnly } diff --git a/go/libraries/doltcore/env/multi_repo_env.go b/go/libraries/doltcore/env/multi_repo_env.go index 4110db26370..671dabd02c3 100644 --- a/go/libraries/doltcore/env/multi_repo_env.go +++ b/go/libraries/doltcore/env/multi_repo_env.go @@ -187,7 +187,7 @@ func MultiEnvForDirectory( version = dEnv.Version } - newEnv := LoadWithoutDB(ctx, GetCurrentUserHomeDir, newFs, doltdb.LocalDirDoltDB, version) + newEnv := Load(ctx, GetCurrentUserHomeDir, newFs, doltdb.LocalDirDoltDB, version) if newEnv.Valid() { envSet[dbfactory.DirToDBName(dir)] = newEnv } else { @@ -246,7 +246,12 @@ func (mrEnv *MultiRepoEnv) ReloadDBs( } } } - mrEnv.envs = enforceSingleFormat(ctx, mrEnv.envs) + // We can skip the format check if there's only one environment. + // This is vital because it allows us to defer loading the database if there's only one. + // TODO: Can we determine the format without loading the database? + if len(mrEnv.envs) > 1 { + mrEnv.envs = enforceSingleFormat(ctx, mrEnv.envs) + } } func (mrEnv *MultiRepoEnv) FileSystem() filesys.Filesys { diff --git a/go/store/nbs/file_manifest.go b/go/store/nbs/file_manifest.go index 970e7dcdfe0..9e301977474 100644 --- a/go/store/nbs/file_manifest.go +++ b/go/store/nbs/file_manifest.go @@ -95,8 +95,7 @@ func MaybeMigrateFileManifest(ctx context.Context, dir string) (bool, error) { } // getFileManifest makes a new file manifest. -func getFileManifest(ctx context.Context, dir string, mode updateMode) (m manifest, err error) { - lock := fslock.New(filepath.Join(dir, lockFileName)) +func getFileManifest(ctx context.Context, lock *fslock.Lock, dir string, mode updateMode) (m manifest, err error) { m = fileManifest{dir: dir, mode: mode, lock: lock} var f *os.File From 2af08366ca9bcc08d7f9c0cd233d77b2b698fc99 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 12 Feb 2025 15:19:13 -0800 Subject: [PATCH 3/3] Add tests that DB isn't loaded unnecessarily. --- go/libraries/doltcore/dbfactory/file.go | 9 ++++++++- .../bats/helper/query-server-common.bash | 5 +++++ integration-tests/bats/sql-server.bats | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/go/libraries/doltcore/dbfactory/file.go b/go/libraries/doltcore/dbfactory/file.go index 129db485af1..d568983b6d1 100644 --- a/go/libraries/doltcore/dbfactory/file.go +++ b/go/libraries/doltcore/dbfactory/file.go @@ -41,6 +41,8 @@ func init() { var chunkJournalFeatureFlag = true +var _, forbidDBLoadForTest = os.LookupEnv("DOLT_FORBID_DB_LOAD_FOR_TEST") + const ( // DoltDir defines the directory used to hold the dolt repo data within the filesys DoltDir = ".dolt" @@ -137,8 +139,13 @@ func (l FileDBLoader) LoadDB(ctx context.Context) (datas.Database, types.ValueRe if s, ok := singletons[l.urlPath]; ok { return s.ddb, s.vrw, s.ns, nil } + if forbidDBLoadForTest { + // If we simply return an error, Dolt will log it and continue with a nil DB. + // Since this can only be hit in testing, it's okay to panic here. + panic("attempted to load DB, but DOLT_FORBID_DB_LOAD_FOR_TEST environment variable was set") + } var newGenSt *nbs.NomsBlockStore - var err error // TODO: Need to take both locks + var err error q := nbs.NewUnlimitedMemQuotaProvider() if l.useJournal && chunkJournalFeatureFlag { newGenSt, err = nbs.NewLocalJournalingStoreWithLock(ctx, l.lockFile, l.nbf.VersionString(), l.path, q) diff --git a/integration-tests/bats/helper/query-server-common.bash b/integration-tests/bats/helper/query-server-common.bash index 8683b2ee4db..f46cf24d2a8 100644 --- a/integration-tests/bats/helper/query-server-common.bash +++ b/integration-tests/bats/helper/query-server-common.bash @@ -53,6 +53,7 @@ start_sql_server() { fi echo db:$DEFAULT_DB logFile:$logFile PORT:$PORT CWD:$PWD SERVER_PID=$! + export DOLT_FORBID_DB_LOAD_FOR_TEST=1 wait_for_connection $PORT 8500 } @@ -74,6 +75,7 @@ start_sql_server_with_args_no_port() { dolt sql-server "$@" --socket "dolt.$PORT.sock" & fi SERVER_PID=$! + export DOLT_FORBID_DB_LOAD_FOR_TEST=1 wait_for_connection $PORT 8500 } @@ -98,6 +100,7 @@ behavior: dolt sql-server --config .cliconfig.yaml --socket "dolt.$PORT.sock" & fi SERVER_PID=$! + export DOLT_FORBID_DB_LOAD_FOR_TEST=1 wait_for_connection $PORT 8500 } @@ -124,6 +127,7 @@ behavior: dolt sql-server --config .cliconfig.yaml --socket "dolt.$PORT.sock" & fi SERVER_PID=$! + export DOLT_FORBID_DB_LOAD_FOR_TEST=1 wait_for_connection $PORT 8500 } @@ -159,6 +163,7 @@ stop_sql_server() { fi; fi SERVER_PID= + unset DOLT_FORBID_DB_LOAD_FOR_TEST } definePORT() { diff --git a/integration-tests/bats/sql-server.bats b/integration-tests/bats/sql-server.bats index 01b63bc78dc..1d164a4cbc7 100644 --- a/integration-tests/bats/sql-server.bats +++ b/integration-tests/bats/sql-server.bats @@ -2077,3 +2077,22 @@ EOF [[ "$output" =~ "br3 | true" ]] || false [[ "$output" =~ "main | false" ]] || false } + +@test "sql-server: confirm DOLT_FORBID_DB_LOAD_FOR_TEST blocks attempts to load the DB" { + export DOLT_FORBID_DB_LOAD_FOR_TEST=1 + run dolt sql -q "select 1" + [ $status -ne 0 ] + [[ $output =~ "attempted to load DB, but DOLT_FORBID_DB_LOAD_FOR_TEST environment variable was set" ]] || false +} + +@test "sql-server: confirm DOLT_FORBID_DB_LOAD_FOR_TEST works" { + cd repo1 + start_sql_server + run echo $DOLT_FORBID_DB_LOAD_FOR_TEST + [ $status -eq 0 ] + [[ $output =~ "1" ]] || false + + # Although $DOLT_FORBID_DB_LOAD_FOR_TEST is set, this succeeds because the DB is not loaded. + run dolt sql -q "select 1" + [ $status -eq 0 ] +} \ No newline at end of file