diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 3d612f5549e30..325f9a0a2e8c6 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -4,44 +4,17 @@ package git import ( - "bufio" "errors" "code.gitea.io/gitea/modules/git/catfile" ) -// ReadBatchLine reads the header line from cat-file --batch while preserving the traditional return signature. -func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { - sha, typ, size, err = catfile.ReadBatchLine(rd) - return sha, typ, size, convertCatfileError(err, sha) -} - -// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTagObjectID(rd, size) -} - -// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTreeID(rd, size) -} - -// BinToHex converts a binary hash into a hex encoded one. -func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { - return catfile.BinToHex(objectFormat, sha, out) -} - // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream. -func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { +func ParseCatFileTreeLine(objectFormat ObjectFormat, rd catfile.ReadCloseDiscarder, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { mode, fname, sha, n, err = catfile.ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf) return mode, fname, sha, n, convertCatfileError(err, nil) } -// DiscardFull discards the requested number of bytes from the buffered reader. -func DiscardFull(rd *bufio.Reader, discard int64) error { - return catfile.DiscardFull(rd, discard) -} - func convertCatfileError(err error, defaultID []byte) error { if err == nil { return nil diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index 88e2be792bce9..abaea95335b96 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -6,10 +6,10 @@ package git import ( - "bufio" "bytes" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -26,39 +26,30 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + objectInfo, contentReader, err := b.repo.objectPool.Object(b.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ID: b.ID.String()} + } return nil, err } - rd := batch.Reader() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) - if err != nil { - cancel() - return nil, err - } - _, _, size, err := ReadBatchLine(rd) - if err != nil { - cancel() - return nil, err - } b.gotSize = true - b.size = size + b.size = objectInfo.Size - if size < 4096 { - bs, err := io.ReadAll(io.LimitReader(rd, size)) - defer cancel() + if b.size < 4096 { + defer contentReader.Close() + bs, err := io.ReadAll(io.LimitReader(contentReader, b.size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) return io.NopCloser(bytes.NewReader(bs)), err } return &blobReader{ - rd: rd, - n: size, - cancel: cancel, + rd: contentReader, + n: b.size, }, nil } @@ -68,32 +59,20 @@ func (b *Blob) Size() int64 { return b.size } - batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) - return 0 - } - defer cancel() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) - return 0 - } - _, _, b.size, err = ReadBatchLine(batch.Reader()) + objInfo, err := b.repo.objectPool.ObjectInfo(b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } + b.size = objInfo.Size b.gotSize = true - return b.size } type blobReader struct { - rd *bufio.Reader - n int64 - cancel func() + rd catfile.ReadCloseDiscarder + n int64 } func (b *blobReader) Read(p []byte) (n int, err error) { @@ -114,13 +93,8 @@ func (b *blobReader) Close() error { return nil } - defer b.cancel() - - if err := DiscardFull(b.rd, b.n+1); err != nil { - return err - } - + err := catfile.DiscardFull(b.rd, b.n+1) + b.rd.Close() b.rd = nil - - return nil + return err } diff --git a/modules/git/catfile/batch.go b/modules/git/catfile/batch.go index 1facb8946eb06..fd99f3e7af742 100644 --- a/modules/git/catfile/batch.go +++ b/modules/git/catfile/batch.go @@ -21,63 +21,6 @@ type WriteCloserError interface { CloseWithError(err error) error } -type Batch interface { - Writer() WriteCloserError - Reader() *bufio.Reader - Close() -} - -// batch represents an active `git cat-file --batch` or `--batch-check` invocation -// paired with the pipes that feed/read from it. Call Close to release resources. -type batch struct { - cancel context.CancelFunc - reader *bufio.Reader - writer WriteCloserError -} - -// NewBatch creates a new cat-file --batch process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatch(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var batch batch - batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) - return &batch, nil -} - -// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatchCheck(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batch - check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) - return &check, nil -} - -func (b *batch) Writer() WriteCloserError { - return b.writer -} - -func (b *batch) Reader() *bufio.Reader { - return b.reader -} - -// Close stops the underlying git cat-file process and releases held resources. -func (b *batch) Close() { - if b == nil || b.cancel == nil { - return - } - b.cancel() - b.reader = nil - b.writer = nil - b.cancel = nil -} - // EnsureValidGitRepository runs `git rev-parse` in the repository path to make sure // the directory is a valid git repository. This avoids git cat-file hanging indefinitely // when invoked in invalid paths. diff --git a/modules/git/catfile/object_pool.go b/modules/git/catfile/object_pool.go new file mode 100644 index 0000000000000..651e4d15b856c --- /dev/null +++ b/modules/git/catfile/object_pool.go @@ -0,0 +1,31 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "io" +) + +type ObjectInfo struct { + ID string + Type string + Size int64 +} + +type Discarder interface { + Discard(n int) (int, error) +} + +type ReadCloseDiscarder interface { + io.ReadCloser + Discarder + ReadBytes(delim byte) ([]byte, error) + ReadSlice(delim byte) (line []byte, err error) +} + +type ObjectPool interface { + ObjectInfo(refName string) (*ObjectInfo, error) + Object(refName string) (*ObjectInfo, ReadCloseDiscarder, error) + Close() +} diff --git a/modules/git/catfile/object_pool_batch.go b/modules/git/catfile/object_pool_batch.go new file mode 100644 index 0000000000000..b7955fc487eea --- /dev/null +++ b/modules/git/catfile/object_pool_batch.go @@ -0,0 +1,181 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "bufio" + "context" + + "code.gitea.io/gitea/modules/log" +) + +// batch represents an active `git cat-file --batch` invocation +// paired with the pipes that feed/read from it. Call Close to release resources. +type batch struct { + cancel context.CancelFunc + reader *bufio.Reader + writer WriteCloserError + inUse bool +} + +func (b *batch) Close() { + if b.cancel != nil { + b.cancel() + b.cancel = nil + } + if b.writer != nil { + _ = b.writer.Close() + b.writer = nil + } +} + +type batchObjectPool struct { + ctx context.Context + repoPath string + batches []*batch + batchChecks []*batch +} + +// NewBatchObjectPool creates a new ObjectPool that uses git cat-file --batch and --batch-check +// to read objects from the repository at repoPath. +func NewBatchObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { + return &batchObjectPool{ + ctx: ctx, + repoPath: repoPath, + }, nil +} + +func (b *batchObjectPool) getBatch() (*batch, error) { + for _, batch := range b.batches { + if !batch.inUse { + batch.inUse = true + return batch, nil + } + } + if len(b.batches) >= 1 { + log.Warn("Opening more than one cat file batch in the same goroutine for: %s", b.repoPath) + } + newBatch, err := b.newBatch() + if err != nil { + return nil, err + } + newBatch.inUse = true + b.batches = append(b.batches, newBatch) + return newBatch, nil +} + +// newBatch creates a new cat-file --batch process for the provided repository path. +// The returned Batch must be closed when objectPool closed. +func (b *batchObjectPool) newBatch() (*batch, error) { + if err := EnsureValidGitRepository(b.ctx, b.repoPath); err != nil { + return nil, err + } + var batch batch + batch.writer, batch.reader, batch.cancel = catFileBatch(b.ctx, b.repoPath) + return &batch, nil +} + +func (b *batchObjectPool) getBatchCheck() (*batch, error) { + for _, batch := range b.batchChecks { + if !batch.inUse { + batch.inUse = true + return batch, nil + } + } + if len(b.batchChecks) >= 1 { + log.Warn("Opening more than one cat file batch-check in the same goroutine for: %s", b.repoPath) + } + newBatch, err := b.newBatchCheck() + if err != nil { + return nil, err + } + newBatch.inUse = true + b.batchChecks = append(b.batchChecks, newBatch) + return newBatch, nil +} + +// newBatchCheck creates a new cat-file --batch-check process for the provided repository path. +// The returned Batch must be closed when objectPool closed. +func (b *batchObjectPool) newBatchCheck() (*batch, error) { + if err := EnsureValidGitRepository(b.ctx, b.repoPath); err != nil { + return nil, err + } + + var check batch + check.writer, check.reader, check.cancel = catFileBatchCheck(b.ctx, b.repoPath) + return &check, nil +} + +func releaseBatchCheck(batch *batch) { + if batch != nil { + batch.inUse = false + } +} + +func (b *batchObjectPool) ObjectInfo(refName string) (*ObjectInfo, error) { + batch, err := b.getBatchCheck() + if err != nil { + return nil, err + } + defer releaseBatchCheck(batch) + + _, err = batch.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, err + } + + var objInfo ObjectInfo + var oid []byte + oid, objInfo.Type, objInfo.Size, err = ReadBatchLine(batch.reader) + if err != nil { + return nil, err + } + objInfo.ID = string(oid) + return &objInfo, nil +} + +type readCloser struct { + *bufio.Reader + batch *batch +} + +func (rc *readCloser) Close() error { + rc.batch.inUse = false + return nil +} + +func (b *batchObjectPool) Object(refName string) (*ObjectInfo, ReadCloseDiscarder, error) { + batch, err := b.getBatch() + if err != nil { + return nil, nil, err + } + + _, err = batch.writer.Write([]byte(refName + "\n")) + if err != nil { + batch.inUse = false + return nil, nil, err + } + + var obj ObjectInfo + var oid []byte + oid, obj.Type, obj.Size, err = ReadBatchLine(batch.reader) + if err != nil { + batch.inUse = false + return nil, nil, err + } + obj.ID = string(oid) + + return &obj, &readCloser{Reader: batch.reader, batch: batch}, nil +} + +func (b *batchObjectPool) Close() { + for _, batch := range b.batches { + batch.Close() + } + b.batches = nil + for _, batchCheck := range b.batchChecks { + batchCheck.Close() + } + b.batchChecks = nil +} diff --git a/modules/git/catfile/reader.go b/modules/git/catfile/reader.go index 1785cc4cc0342..b09af97547441 100644 --- a/modules/git/catfile/reader.go +++ b/modules/git/catfile/reader.go @@ -69,7 +69,7 @@ func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err er } // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { +func ReadTagObjectID(rd ReadCloseDiscarder, size int64) (string, error) { var id string var n int64 headerLoop: @@ -94,7 +94,7 @@ headerLoop: } // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the commit content. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { +func ReadTreeID(rd ReadCloseDiscarder, size int64) (string, error) { var id string var n int64 headerLoop: @@ -136,7 +136,7 @@ func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream and avoids allocations // where possible. Each line is composed of: // SP NUL -func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { +func ParseCatFileTreeLine(objectFormat ObjectFormat, rd ReadCloseDiscarder, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { var readBytes []byte readBytes, err = rd.ReadSlice('\x00') @@ -192,7 +192,7 @@ func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, } // DiscardFull discards the requested amount of bytes from the buffered reader regardless of its internal limit. -func DiscardFull(rd *bufio.Reader, discard int64) error { +func DiscardFull(rd ReadCloseDiscarder, discard int64) error { if discard > math.MaxInt32 { n, err := rd.Discard(math.MaxInt32) discard -= int64(n) diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index da291ae8481d7..e7cd0dee4939b 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/attribute" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/optional" @@ -20,41 +21,9 @@ import ( // GetLanguageStats calculates language stats for git repository at specified commit func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { - // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. - // so let's create a batch stdin and stdout - batch, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - writeID := func(id string) error { - _, err := batch.Writer().Write([]byte(id + "\n")) - return err - } - - if err := writeID(commitID); err != nil { - return nil, err - } - batchReader := batch.Reader() - shaBytes, typ, size, err := git.ReadBatchLine(batchReader) - if typ != "commit" { - log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, git.ErrNotExist{ID: commitID} - } - - sha, err := git.NewIDFromString(string(shaBytes)) + commit, err := repo.GetCommit(commitID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, git.ErrNotExist{ID: commitID} - } - - commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) - if err != nil { - log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) - return nil, err - } - if _, err = batchReader.Discard(1); err != nil { return nil, err } @@ -143,30 +112,30 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, } // If content can not be read or file is too big just do detection by filename - if f.Size() <= bigFileSize { - if err := writeID(f.ID.String()); err != nil { - return nil, err - } - _, _, size, err := git.ReadBatchLine(batchReader) + objectInfo, contentReader, err := repo.ObjectPool().Object(f.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: f.ID.String()} + } log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } + defer contentReader.Close() - sizeToRead := size + sizeToRead := objectInfo.Size discard := int64(1) - if size > fileSizeLimit { + if objectInfo.Size > fileSizeLimit { sizeToRead = fileSizeLimit - discard = size - fileSizeLimit + 1 + discard = objectInfo.Size - fileSizeLimit + 1 } - _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) + _, err = contentBuf.ReadFrom(io.LimitReader(contentReader, sizeToRead)) if err != nil { return nil, err } content = contentBuf.Bytes() - if err := git.DiscardFull(batchReader, discard); err != nil { + if err := catfile.DiscardFull(contentReader, discard); err != nil { return nil, err } } diff --git a/modules/git/parse_treeentry.go b/modules/git/parse_treeentry.go index e14d9f17b5dcd..b7bb8ac2f09ad 100644 --- a/modules/git/parse_treeentry.go +++ b/modules/git/parse_treeentry.go @@ -4,11 +4,11 @@ package git import ( - "bufio" "bytes" "fmt" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -47,7 +47,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { return entries, nil } -func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) { +func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd catfile.ReadCloseDiscarder, sz int64) ([]*TreeEntry, error) { fnameBuf := make([]byte, 4096) modeBuf := make([]byte, 40) shaBuf := make([]byte, objectFormat.FullLength()) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 6f1a860c1dba7..a976a14ad4800 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -14,6 +14,7 @@ import ( "sync" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" ) @@ -47,15 +48,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batch, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - batchStdinWriter := batch.Writer() - batchReader := batch.Reader() - // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) trees := [][]byte{} @@ -67,59 +59,50 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for scan.Scan() { // Get the next commit ID - commitID := scan.Bytes() - - // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) - if err != nil { - return nil, err - } + commitID := scan.Text() var curCommit *git.Commit curPath := "" commitReadingLoop: for { - _, typ, size, err := git.ReadBatchLine(batchReader) + objectInfo, contentReader, err := repo.ObjectPool().Object(commitID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: commitID} + } return nil, err } - switch typ { + switch objectInfo.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := git.ReadTagObjectID(batchReader, size) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte(id + "\n")) + id, err := catfile.ReadTagObjectID(contentReader, objectInfo.Size) if err != nil { + contentReader.Close() return nil, err } - continue + commitID = id case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(contentReader, objectInfo.Size)) if err != nil { + contentReader.Close() return nil, err } - if _, err := batchReader.Discard(1); err != nil { + if _, err := contentReader.Discard(1); err != nil { + contentReader.Close() return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { - return nil, err - } + commitID = curCommit.Tree.ID.String() curPath = "" case "tree": var n int64 - for n < size { - mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) + for n < objectInfo.Size { + mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), contentReader, modeBuf, fnameBuf, workingShaBuf) if err != nil { + contentReader.Close() return nil, err } n += int64(count) @@ -134,34 +117,31 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result } else if string(mode) == git.EntryModeTree.String() { hexObjectID := make([]byte, objectID.Type().FullLength()) - git.BinToHex(objectID.Type(), binObjectID, hexObjectID) + catfile.BinToHex(objectID.Type(), binObjectID, hexObjectID) trees = append(trees, hexObjectID) paths = append(paths, curPath+string(fname)+"/") } } - if _, err := batchReader.Discard(1); err != nil { + if _, err := contentReader.Discard(1); err != nil { + contentReader.Close() return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) - if err != nil { - return nil, err - } + commitID = string(trees[len(trees)-1]) curPath = paths[len(paths)-1] trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] } else { + contentReader.Close() break commitReadingLoop } default: - if err := git.DiscardFull(batchReader, size+1); err != nil { + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { + contentReader.Close() return nil, err } } + contentReader.Close() } } diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 97a43b90fd3a2..567a0c1d5cdfc 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -11,7 +11,6 @@ import ( "path/filepath" "code.gitea.io/gitea/modules/git/catfile" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/util" ) @@ -23,11 +22,7 @@ type Repository struct { tagCache *ObjectCache[*Tag] - batchInUse bool - batch catfile.Batch - - checkInUse bool - check catfile.Batch + objectPool catfile.ObjectPool Ctx context.Context LastCommitCache *LastCommitCache @@ -49,76 +44,29 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { return nil, util.NewNotExistErrorf("no such file or directory") } - return &Repository{ - Path: repoPath, - tagCache: newObjectCache[*Tag](), - Ctx: ctx, - }, nil -} - -// CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func(), error) { - if repo.batch == nil { - var err error - repo.batch, err = catfile.NewBatch(ctx, repo.Path) - if err != nil { - return nil, nil, err - } - } - - if !repo.batchInUse { - repo.batchInUse = true - return repo.batch, func() { - repo.batchInUse = false - }, nil - } - - log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := catfile.NewBatch(ctx, repo.Path) + objectPool, err := catfile.NewBatchObjectPool(ctx, repoPath) if err != nil { - return nil, nil, err - } - return tempBatch, tempBatch.Close, nil -} - -// CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.Batch, func(), error) { - if repo.check == nil { - var err error - repo.check, err = catfile.NewBatchCheck(ctx, repo.Path) - if err != nil { - return nil, nil, err - } + return nil, err } - if !repo.checkInUse { - repo.checkInUse = true - return repo.check, func() { - repo.checkInUse = false - }, nil - } + return &Repository{ + Path: repoPath, + tagCache: newObjectCache[*Tag](), + objectPool: objectPool, + Ctx: ctx, + }, nil +} - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := catfile.NewBatchCheck(ctx, repo.Path) - if err != nil { - return nil, nil, err - } - return tempBatchCheck, tempBatchCheck.Close, nil +func (repo *Repository) ObjectPool() catfile.ObjectPool { + return repo.objectPool } func (repo *Repository) Close() error { if repo == nil { return nil } - if repo.batch != nil { - repo.batch.Close() - repo.batch = nil - repo.batchInUse = false - } - if repo.check != nil { - repo.check.Close() - repo.check = nil - repo.checkInUse = false + if repo.objectPool != nil { + repo.objectPool.Close() } repo.LastCommitCache = nil repo.tagCache = nil diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 09873fb2c626d..e5716588d9562 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -8,7 +8,6 @@ package git import ( "bufio" - "bytes" "context" "io" "strings" @@ -23,19 +22,12 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfo, err := repo.objectPool.ObjectInfo(name) if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) + log.Debug("Error writing to ObjectInfo %v", err) return false } - defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - sha, _, _, err := ReadBatchLine(batch.Reader()) - return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) + return strings.HasPrefix(objInfo.ID, strings.TrimSpace(name)) } // IsReferenceExist returns true if given reference exists in the repository. @@ -44,18 +36,7 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - _, _, _, err = ReadBatchLine(batch.Reader()) + _, err := repo.objectPool.ObjectInfo(name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index a3d728eb6d05b..0ab0b4a036277 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -37,21 +37,14 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) - if err != nil { - return "", err - } - defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) + objInfo, err := repo.objectPool.ObjectInfo(name) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return "", ErrNotExist{name, ""} + } return "", err } - shaBs, _, _, err := ReadBatchLine(batch.Reader()) - if IsErrNotExist(err) { - return "", ErrNotExist{name, ""} - } - - return string(shaBs), nil + return objInfo.ID, nil } // IsCommitExist returns true if given commit exists in current repository. @@ -68,70 +61,60 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - batch, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - _, _ = batch.Writer().Write([]byte(id.String() + "\n")) - - return repo.getCommitFromBatchReader(batch, id) -} - -func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectID) (*Commit, error) { - rd := batch.Reader() - _, typ, size, err := ReadBatchLine(rd) + objectInfo, contentReader, err := repo.objectPool.Object(id.String()) if err != nil { - if errors.Is(err, io.EOF) || IsErrNotExist(err) { + if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: id.String()} } return nil, err } - switch typ { + switch objectInfo.Type { case "missing": + contentReader.Close() return nil, ErrNotExist{ID: id.String()} case "tag": // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(contentReader, objectInfo.Size)) if err != nil { + contentReader.Close() return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) if err != nil { + contentReader.Close() return nil, err } + contentReader.Close() + tag, err := parseTagData(id.Type(), data) if err != nil { return nil, err } - if _, err := batch.Writer().Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommit(tag.Object) if err != nil { return nil, err } return commit, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + defer contentReader.Close() + commit, err := CommitFromReader(repo, id, io.LimitReader(contentReader, objectInfo.Size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) if err != nil { return nil, err } return commit, nil default: - log.Debug("Unknown typ: %s", typ) - if err := DiscardFull(rd, size+1); err != nil { + defer contentReader.Close() + log.Debug("Unknown typ: %s", objectInfo.Type) + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ @@ -153,22 +136,12 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfo, err := repo.objectPool.ObjectInfo(commitID) if err != nil { - return nil, err - } - defer cancel() - _, err = batch.Writer().Write([]byte(commitID + "\n")) - if err != nil { - return nil, err - } - sha, _, _, err := ReadBatchLine(batch.Reader()) - if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{commitID, ""} } return nil, err } - - return MustIDFromString(string(sha)), nil + return MustIDFromString(objInfo.ID), nil } diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 88d9edcbd88b5..114f2854a7015 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -10,6 +10,7 @@ import ( "errors" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -24,23 +25,14 @@ func (repo *Repository) IsTagExist(name string) bool { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfo, err := repo.objectPool.ObjectInfo(id.String()) if err != nil { - return "", err - } - defer cancel() - _, err = batch.Writer().Write([]byte(id.String() + "\n")) - if err != nil { - return "", err - } - _, typ, _, err := ReadBatchLine(batch.Reader()) - if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{ID: id.String()} } return "", err } - return typ, nil + return objInfo.Type, nil } func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { @@ -87,26 +79,17 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { return tag, nil } - // The tag is an annotated tag with a message. - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectInfo, contentReader, err := repo.objectPool.Object(tagID.String()) if err != nil { - return nil, err - } - defer cancel() - - rd := batch.Reader() - if _, err := batch.Writer().Write([]byte(tagID.String() + "\n")); err != nil { - return nil, err - } - _, typ, size, err := ReadBatchLine(rd) - if err != nil { - if errors.Is(err, io.EOF) || IsErrNotExist(err) { + if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } - if typ != "tag" { - if err := DiscardFull(rd, size+1); err != nil { + defer contentReader.Close() + + if objectInfo.Type != "tag" { + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ID: tagID.String()} @@ -114,11 +97,11 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(contentReader, objectInfo.Size)) if err != nil { return nil, err } - _, err = rd.Discard(1) + _, err = contentReader.Discard(1) if err != nil { return nil, err } diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index e6e2ee9fa0655..46c63f1480604 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -7,71 +7,70 @@ package git import ( "io" + + "code.gitea.io/gitea/modules/git/catfile" ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - batch, cancel, err := repo.CatFileBatch(repo.Ctx) - if err != nil { - return nil, err - } - defer cancel() - - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(id.String() + "\n")) - - // ignore the SHA - _, typ, size, err := ReadBatchLine(rd) + objectInfo, contentReader, err := repo.objectPool.Object(id.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: id.String(), + } + } return nil, err } - switch typ { + switch objectInfo.Type { case "tag": resolvedID := id - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(contentReader, objectInfo.Size)) if err != nil { + contentReader.Close() return nil, err } tag, err := parseTagData(id.Type(), data) if err != nil { + contentReader.Close() return nil, err } + contentReader.Close() // close reader to avoid open a new process in the same goroutine - if _, err := wr.Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommit(tag.Object) if err != nil { return nil, err } commit.Tree.ResolvedID = resolvedID return &commit.Tree, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + defer contentReader.Close() + commit, err := CommitFromReader(repo, id, io.LimitReader(contentReader, objectInfo.Size)) if err != nil { return nil, err } - if _, err := rd.Discard(1); err != nil { + if _, err := contentReader.Discard(1); err != nil { return nil, err } commit.Tree.ResolvedID = commit.ID return &commit.Tree, nil case "tree": + defer contentReader.Close() tree := NewTree(repo, id) tree.ResolvedID = id objectFormat, err := repo.GetObjectFormat() if err != nil { return nil, err } - tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, size) + tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, contentReader, objectInfo.Size) if err != nil { return nil, err } tree.entriesParsed = true return tree, nil default: - if err := DiscardFull(rd, size+1); err != nil { + defer contentReader.Close() + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/tree_entry.go b/modules/git/tree_entry.go index e7e4ea2d5bdb2..b50a0f0d52727 100644 --- a/modules/git/tree_entry.go +++ b/modules/git/tree_entry.go @@ -126,13 +126,13 @@ func EntryFollowLinks(commit *Commit, firstFullPath string, firstTreeEntry *Tree } // returns the Tree pointed to by this TreeEntry, or nil if this is not a tree -func (te *TreeEntry) Tree() *Tree { +func (te *TreeEntry) Tree() (*Tree, error) { t, err := te.ptree.repo.getTree(te.ID) if err != nil { - return nil + return nil, err } t.ptree = te.ptree - return t + return t, nil } // GetSubJumpablePathName return the full path of subdirectory jumpable ( contains only one directory ) diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 0ea7aeed9d44c..970b3664415ba 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -15,23 +15,12 @@ func (te *TreeEntry) Size() int64 { return te.size } - batch, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + objInfo, err := te.ptree.repo.objectPool.ObjectInfo(te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } - defer cancel() - _, err = batch.Writer().Write([]byte(te.ID.String() + "\n")) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) - return 0 - } - _, _, te.size, err = ReadBatchLine(batch.Reader()) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) - return 0 - } - + te.size = objInfo.Size te.sized = true return te.size } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index b8561dd3523e5..ed2521152086f 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -9,6 +9,7 @@ import ( "io" "strings" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" ) @@ -27,32 +28,35 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - batch, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + objectInfo, contentReader, err := t.repo.objectPool.Object(t.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: t.ID.String(), + } + } return nil, err } - defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) - _, typ, sz, err := ReadBatchLine(rd) - if err != nil { - return nil, err - } - if typ == "commit" { - treeID, err := ReadTreeID(rd, sz) + if objectInfo.Type == "commit" { + treeID, err := catfile.ReadTreeID(contentReader, objectInfo.Size) + contentReader.Close() // close reader to avoid open a new process in the same goroutine if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) - _, typ, sz, err = ReadBatchLine(rd) + objectInfo, contentReader, err = t.repo.objectPool.Object(treeID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: treeID, + } + } return nil, err } + defer contentReader.Close() } - if typ == "tree" { - t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, sz) + if objectInfo.Type == "tree" { + t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, contentReader, objectInfo.Size) if err != nil { return nil, err } @@ -61,7 +65,7 @@ func (t *Tree) ListEntries() (Entries, error) { } // Not a tree just use ls-tree instead - if err := DiscardFull(rd, sz+1); err != nil { + if err := catfile.DiscardFull(contentReader, objectInfo.Size+1); err != nil { return nil, err } } diff --git a/modules/gitrepo/cat_file.go b/modules/gitrepo/cat_file.go index 0e5fc9951c3ba..91e78b8021cb1 100644 --- a/modules/gitrepo/cat_file.go +++ b/modules/gitrepo/cat_file.go @@ -9,6 +9,6 @@ import ( "code.gitea.io/gitea/modules/git/catfile" ) -func NewBatch(ctx context.Context, repo Repository) (catfile.Batch, error) { - return catfile.NewBatch(ctx, repoPath(repo)) +func NewObjectPool(ctx context.Context, repo Repository) (catfile.ObjectPool, error) { + return catfile.NewBatchObjectPool(ctx, repoPath(repo)) } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index a3727bd0cbd99..a448ca15921e7 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -151,7 +151,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -177,17 +177,18 @@ func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, com return b.addDelete(update.Filename, repo, batch) } - if _, err := catfileBatch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return err - } - - batchReader := catfileBatch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + objectInfo, contentReader, err := objectPool.Object(update.BlobSha) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return git.ErrNotExist{ID: update.BlobSha} + } return err } + defer contentReader.Close() + + size = objectInfo.Size - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) + fileContents, err := io.ReadAll(io.LimitReader(contentReader, size)) if err != nil { return err } else if !typesniffer.DetectContentType(fileContents).IsText() { @@ -196,7 +197,7 @@ func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, com fileContents = nil } - if _, err = batchReader.Discard(1); err != nil { + if _, err = contentReader.Discard(1); err != nil { return err } id := internal.FilenameIndexerID(repo.ID, update.Filename) @@ -219,18 +220,18 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - catfileBatch, err := gitrepo.NewBatch(ctx, repo) + objectPool, err := gitrepo.NewObjectPool(ctx, repo) if err != nil { return err } - defer catfileBatch.Close() + defer objectPool.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, catfileBatch, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, objectPool, sha, update, repo, batch); err != nil { return err } } - catfileBatch.Close() + objectPool.Close() } for _, filename := range changes.RemovedFilenames { if err := b.addDelete(filename, repo, batch); err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 653df0bd1102d..c52eb1f5331f5 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -139,7 +139,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -162,17 +162,18 @@ func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return nil, err - } - - batchReader := batch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + object, contentReader, err := objectPool.Object(update.BlobSha) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: update.BlobSha} + } return nil, err } + defer contentReader.Close() + + size = object.Size - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) + fileContents, err := io.ReadAll(io.LimitReader(contentReader, size)) if err != nil { return nil, err } else if !typesniffer.DetectContentType(fileContents).IsText() { @@ -180,7 +181,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string return nil, nil } - if _, err = batchReader.Discard(1); err != nil { + if _, err = contentReader.Discard(1); err != nil { return nil, err } id := internal.FilenameIndexerID(repo.ID, update.Filename) @@ -211,14 +212,13 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - batch, err := gitrepo.NewBatch(ctx, repo) + objectPool, err := gitrepo.NewObjectPool(ctx, repo) if err != nil { return err } - defer batch.Close() - + defer objectPool.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batch, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, objectPool, sha, update, repo) if err != nil { return err } @@ -226,7 +226,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st reqs = append(reqs, updateReqs...) } } - batch.Close() + objectPool.Close() } for _, filename := range changes.RemovedFilenames { diff --git a/routers/api/v1/repo/hook_test.go b/routers/api/v1/repo/hook_test.go index f8d61ccf00067..451a57effeeb1 100644 --- a/routers/api/v1/repo/hook_test.go +++ b/routers/api/v1/repo/hook_test.go @@ -20,7 +20,8 @@ func TestTestHook(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/wiki/_pages") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) TestHook(ctx) assert.Equal(t, http.StatusNoContent, ctx.Resp.WrittenStatus()) diff --git a/routers/api/v1/utils/hook_test.go b/routers/api/v1/utils/hook_test.go index e5e8ce07cecfe..99eaa90c5c5e9 100644 --- a/routers/api/v1/utils/hook_test.go +++ b/routers/api/v1/utils/hook_test.go @@ -20,7 +20,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ @@ -36,7 +37,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation with invalid URL", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ @@ -52,7 +54,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation with invalid webhook type", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ @@ -68,7 +71,8 @@ func TestTestHookValidation(t *testing.T) { t.Run("Test Validation with empty content type", func(t *testing.T) { ctx, _ := contexttest.MockAPIContext(t, "user2/repo1/hooks") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) checkCreateHookOption(ctx, &structs.CreateHookOption{ diff --git a/routers/web/repo/view_readme.go b/routers/web/repo/view_readme.go index f1fa5732f0a79..11461568fe680 100644 --- a/routers/web/repo/view_readme.go +++ b/routers/web/repo/view_readme.go @@ -93,9 +93,9 @@ func findReadmeFileInEntries(ctx *context.Context, parentDir string, entries []* if subTreeEntry == nil { continue } - subTree := subTreeEntry.Tree() - if subTree == nil { - // this should be impossible; if subTreeEntry exists so should this. + subTree, err := subTreeEntry.Tree() + if err != nil { // this should be impossible; if subTreeEntry exists so should this. + log.Error("Unable to get tree for %s: %v", subTreeEntry.Name(), err) continue } childEntries, err := subTree.ListEntries() diff --git a/services/contexttest/context_tests.go b/services/contexttest/context_tests.go index 44d9f4a70f0ec..acac28d815ac9 100644 --- a/services/contexttest/context_tests.go +++ b/services/contexttest/context_tests.go @@ -130,7 +130,7 @@ func LoadRepo(t *testing.T, ctx gocontext.Context, repoID int64) { } // LoadRepoCommit loads a repo's commit into a test context. -func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { +func LoadRepoCommit(t *testing.T, ctx gocontext.Context) func() { var repo *context.Repository switch ctx := ctx.(type) { case *context.Context: @@ -143,7 +143,12 @@ func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { gitRepo, err := gitrepo.OpenRepository(ctx, repo.Repository) require.NoError(t, err) - defer gitRepo.Close() + // we can't close gitrepo here because it will be stored in Commit and be used by other places + cancel := func() { + if gitRepo != nil { + gitRepo.Close() + } + } if repo.RefFullName == "" { repo.RefFullName = git_module.RefNameFromBranch(repo.Repository.DefaultBranch) @@ -153,6 +158,7 @@ func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { } repo.Commit, err = gitRepo.GetCommit(repo.RefFullName.String()) require.NoError(t, err) + return cancel } // LoadUser load a user into a test context diff --git a/services/repository/files/content_test.go b/services/repository/files/content_test.go index d72f918074d16..f551cfdcec1ff 100644 --- a/services/repository/files/content_test.go +++ b/services/repository/files/content_test.go @@ -25,7 +25,8 @@ func TestGetContents(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) diff --git a/services/repository/files/diff_test.go b/services/repository/files/diff_test.go index ae702e4189804..6b492b383a70f 100644 --- a/services/repository/files/diff_test.go +++ b/services/repository/files/diff_test.go @@ -20,7 +20,8 @@ func TestGetDiffPreview(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -137,7 +138,8 @@ func TestGetDiffPreviewErrors(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() diff --git a/services/repository/files/tree.go b/services/repository/files/tree.go index e481a3e7d2672..3f164ee7c29b9 100644 --- a/services/repository/files/tree.go +++ b/services/repository/files/tree.go @@ -207,7 +207,11 @@ func listTreeNodes(ctx context.Context, repoLink string, renderedIconPool *filei if subTreePath[0] == '/' { subTreePath = subTreePath[1:] } - subNodes, err := listTreeNodes(ctx, repoLink, renderedIconPool, commit, entry.Tree(), subTreePath, subPathRemaining) + tree, err := entry.Tree() + if err != nil { + return nil, err + } + subNodes, err := listTreeNodes(ctx, repoLink, renderedIconPool, commit, tree, subTreePath, subPathRemaining) if err != nil { log.Error("listTreeNodes: %v", err) } else { @@ -224,5 +228,9 @@ func GetTreeViewNodes(ctx context.Context, repoLink string, renderedIconPool *fi if err != nil { return nil, err } - return listTreeNodes(ctx, repoLink, renderedIconPool, commit, entry.Tree(), treePath, subPath) + tree, err := entry.Tree() + if err != nil { + return nil, err + } + return listTreeNodes(ctx, repoLink, renderedIconPool, commit, tree, treePath, subPath) } diff --git a/services/repository/files/tree_test.go b/services/repository/files/tree_test.go index e7511b3eed446..a3a2d864e9d0d 100644 --- a/services/repository/files/tree_test.go +++ b/services/repository/files/tree_test.go @@ -20,7 +20,8 @@ func TestGetTreeBySHA(t *testing.T) { unittest.PrepareTestEnv(t) ctx, _ := contexttest.MockContext(t, "user2/repo1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -59,7 +60,8 @@ func TestGetTreeViewNodes(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.Repo.RefFullName = git.RefNameFromBranch("sub-home-md-img-check") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() diff --git a/tests/integration/repofiles_change_test.go b/tests/integration/repofiles_change_test.go index 6fd42401c5259..3498a4c35b6fc 100644 --- a/tests/integration/repofiles_change_test.go +++ b/tests/integration/repofiles_change_test.go @@ -349,7 +349,8 @@ func TestChangeRepoFilesForCreate(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -386,7 +387,8 @@ func TestChangeRepoFilesForUpdate(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -420,7 +422,8 @@ func TestChangeRepoFilesForUpdateWithFileMove(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -469,7 +472,8 @@ func TestChangeRepoFilesForUpdateWithFileRename(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/lfs") ctx.SetPathParam("id", "54") contexttest.LoadRepo(t, ctx, 54) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -503,7 +507,8 @@ func TestChangeRepoFilesWithoutBranchNames(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -534,7 +539,8 @@ func TestChangeRepoFilesForDelete(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() @@ -621,7 +627,8 @@ func TestChangeRepoFilesErrors(t *testing.T) { ctx, _ := contexttest.MockContext(t, "user2/repo1") ctx.SetPathParam("id", "1") contexttest.LoadRepo(t, ctx, 1) - contexttest.LoadRepoCommit(t, ctx) + cancel := contexttest.LoadRepoCommit(t, ctx) + defer cancel() contexttest.LoadUser(t, ctx, 2) contexttest.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close()