From 2bf7d2d8fbd39e1e3181d18e586090e402ff472a Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 4 Jan 2026 22:55:12 -0800 Subject: [PATCH 1/3] Add object pool interface and refactor related code --- modules/git/blob_nogogit.go | 31 ++---- modules/git/catfile/batch.go | 57 ---------- modules/git/catfile/object_pool.go | 30 ++++++ modules/git/catfile/object_pool_cmd.go | 102 ++++++++++++++++++ .../languagestats/language_stats_nogogit.go | 32 +++--- modules/git/pipeline/lfs_nogogit.go | 49 +++------ modules/git/repo_base_nogogit.go | 16 +-- modules/git/repo_branch_nogogit.go | 20 ++-- modules/git/repo_commit_nogogit.go | 56 ++++------ modules/git/repo_tag_nogogit.go | 26 ++--- modules/git/repo_tree_nogogit.go | 26 ++--- modules/git/tree_entry_nogogit.go | 11 +- modules/git/tree_nogogit.go | 23 ++-- modules/gitrepo/cat_file.go | 4 +- modules/indexer/code/bleve/bleve.go | 20 ++-- .../code/elasticsearch/elasticsearch.go | 22 ++-- 16 files changed, 260 insertions(+), 265 deletions(-) create mode 100644 modules/git/catfile/object_pool.go create mode 100644 modules/git/catfile/object_pool_cmd.go diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index 88e2be792bce9..ccede82545084 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -26,27 +26,23 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. func (b *Blob) DataAsync() (io.ReadCloser, error) { - batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) + objectPool, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { return nil, err } - rd := batch.Reader() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) - if err != nil { - cancel() - return nil, err - } - _, _, size, err := ReadBatchLine(rd) + object, err := objectPool.Object(b.repo.Ctx, b.ID.String()) if err != nil { cancel() return nil, err } + + rd := object.Reader b.gotSize = true - b.size = size + b.size = object.Size - if size < 4096 { - bs, err := io.ReadAll(io.LimitReader(rd, size)) + if b.size < 4096 { + bs, err := io.ReadAll(io.LimitReader(rd, b.size)) defer cancel() if err != nil { return nil, err @@ -57,7 +53,7 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { return &blobReader{ rd: rd, - n: size, + n: b.size, cancel: cancel, }, nil } @@ -68,25 +64,20 @@ func (b *Blob) Size() int64 { return b.size } - batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + objInfoPool, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } defer cancel() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) - return 0 - } - _, _, b.size, err = ReadBatchLine(batch.Reader()) + objInfo, err := objInfoPool.ObjectInfo(b.repo.Ctx, b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } + b.size = objInfo.Size b.gotSize = true - return b.size } diff --git a/modules/git/catfile/batch.go b/modules/git/catfile/batch.go index 1facb8946eb06..fd99f3e7af742 100644 --- a/modules/git/catfile/batch.go +++ b/modules/git/catfile/batch.go @@ -21,63 +21,6 @@ type WriteCloserError interface { CloseWithError(err error) error } -type Batch interface { - Writer() WriteCloserError - Reader() *bufio.Reader - Close() -} - -// batch represents an active `git cat-file --batch` or `--batch-check` invocation -// paired with the pipes that feed/read from it. Call Close to release resources. -type batch struct { - cancel context.CancelFunc - reader *bufio.Reader - writer WriteCloserError -} - -// NewBatch creates a new cat-file --batch process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatch(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var batch batch - batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) - return &batch, nil -} - -// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatchCheck(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batch - check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) - return &check, nil -} - -func (b *batch) Writer() WriteCloserError { - return b.writer -} - -func (b *batch) Reader() *bufio.Reader { - return b.reader -} - -// Close stops the underlying git cat-file process and releases held resources. -func (b *batch) Close() { - if b == nil || b.cancel == nil { - return - } - b.cancel() - b.reader = nil - b.writer = nil - b.cancel = nil -} - // EnsureValidGitRepository runs `git rev-parse` in the repository path to make sure // the directory is a valid git repository. This avoids git cat-file hanging indefinitely // when invoked in invalid paths. diff --git a/modules/git/catfile/object_pool.go b/modules/git/catfile/object_pool.go new file mode 100644 index 0000000000000..b48e27768a959 --- /dev/null +++ b/modules/git/catfile/object_pool.go @@ -0,0 +1,30 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "bufio" + "context" +) + +type ObjectInfo struct { + ID string + Type string + Size int64 +} + +type ObjectInfoPool interface { + ObjectInfo(ctx context.Context, sha string) (*ObjectInfo, error) + Close() +} + +type Object struct { + ObjectInfo + Reader *bufio.Reader +} + +type ObjectPool interface { + Object(ctx context.Context, sha string) (*Object, error) + Close() +} diff --git a/modules/git/catfile/object_pool_cmd.go b/modules/git/catfile/object_pool_cmd.go new file mode 100644 index 0000000000000..9e9750465b983 --- /dev/null +++ b/modules/git/catfile/object_pool_cmd.go @@ -0,0 +1,102 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package catfile + +import ( + "bufio" + "context" +) + +// batchCheck represents an active `git cat-file --batch-check` invocation +// paired with the pipes that feed/read from it. Call Close to release resources. +type batchCheck struct { + cancel context.CancelFunc + reader *bufio.Reader + writer WriteCloserError +} + +// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. +// The returned Batch must be closed once the caller has finished with it. +func NewObjectInfoPool(ctx context.Context, repoPath string) (ObjectInfoPool, error) { + if err := EnsureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + var check batchCheck + check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) + return &check, nil +} + +func (b *batchCheck) ObjectInfo(ctx context.Context, refName string) (*ObjectInfo, error) { + _, err := b.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, err + } + + var objInfo ObjectInfo + var oid []byte + oid, objInfo.Type, objInfo.Size, err = ReadBatchLine(b.reader) + if err != nil { + return nil, err + } + objInfo.ID = string(oid) + return &objInfo, nil +} + +// Close stops the underlying git cat-file process and releases held resources. +func (b *batchCheck) Close() { + if b.cancel != nil { + b.cancel() + } + if b.writer != nil { + _ = b.writer.Close() + } +} + +// batch represents an active `git cat-file --batch` invocation +// paired with the pipes that feed/read from it. Call Close to release resources. +type batch struct { + cancel context.CancelFunc + reader *bufio.Reader + writer WriteCloserError +} + +// NewBatch creates a new cat-file --batch process for the provided repository path. +// The returned Batch must be closed once the caller has finished with it. +func NewObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { + if err := EnsureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + + var batch batch + batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) + return &batch, nil +} + +func (b *batch) Object(ctx context.Context, refName string) (*Object, error) { + _, err := b.writer.Write([]byte(refName + "\n")) + if err != nil { + return nil, err + } + + var obj Object + var oid []byte + oid, obj.Type, obj.Size, err = ReadBatchLine(b.reader) + if err != nil { + return nil, err + } + obj.ID = string(oid) + obj.Reader = b.reader + + return &obj, nil +} + +func (b *batch) Close() { + if b.cancel != nil { + b.cancel() + } + if b.writer != nil { + _ = b.writer.Close() + } +} diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index da291ae8481d7..3597a0960d1ca 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -22,34 +22,30 @@ import ( func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) { // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. // so let's create a batch stdin and stdout - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - writeID := func(id string) error { - _, err := batch.Writer().Write([]byte(id + "\n")) - return err - } - - if err := writeID(commitID); err != nil { + object, err := objectPool.Object(repo.Ctx, commitID) + if err != nil { return nil, err } - batchReader := batch.Reader() - shaBytes, typ, size, err := git.ReadBatchLine(batchReader) - if typ != "commit" { + if object.Type != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} } - sha, err := git.NewIDFromString(string(shaBytes)) + batchReader := object.Reader + + sha, err := git.NewIDFromString(object.ID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} } - commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) + commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, object.Size)) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, err @@ -145,20 +141,18 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - if err := writeID(f.ID.String()); err != nil { - return nil, err - } - _, _, size, err := git.ReadBatchLine(batchReader) + object, err := objectPool.Object(repo.Ctx, f.ID.String()) if err != nil { log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } + batchReader := object.Reader - sizeToRead := size + sizeToRead := object.Size discard := int64(1) - if size > fileSizeLimit { + if object.Size > fileSizeLimit { sizeToRead = fileSizeLimit - discard = size - fileSizeLimit + 1 + discard = object.Size - fileSizeLimit + 1 } _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 6f1a860c1dba7..51f0cf26407d2 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -47,15 +47,12 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err // Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. // so let's create a batch stdin and stdout - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - batchStdinWriter := batch.Writer() - batchReader := batch.Reader() - // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) trees := [][]byte{} @@ -67,43 +64,32 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for scan.Scan() { // Get the next commit ID - commitID := scan.Bytes() - - // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) - if err != nil { - return nil, err - } + commitID := scan.Text() var curCommit *git.Commit curPath := "" commitReadingLoop: for { - _, typ, size, err := git.ReadBatchLine(batchReader) + object, err := objectPool.Object(repo.Ctx, commitID) if err != nil { return nil, err } - switch typ { + batchReader := object.Reader + + switch object.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := git.ReadTagObjectID(batchReader, size) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte(id + "\n")) + id, err := git.ReadTagObjectID(batchReader, object.Size) if err != nil { return nil, err } + commitID = id continue case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, object.Size)) if err != nil { return nil, err } @@ -111,13 +97,11 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { - return nil, err - } + commitID = curCommit.Tree.ID.String() curPath = "" case "tree": var n int64 - for n < size { + for n < object.Size { mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) if err != nil { return nil, err @@ -143,14 +127,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) - if err != nil { - return nil, err - } + commitID = string(trees[len(trees)-1]) curPath = paths[len(paths)-1] trees = trees[:len(trees)-1] paths = paths[:len(paths)-1] @@ -158,7 +135,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err break commitReadingLoop } default: - if err := git.DiscardFull(batchReader, size+1); err != nil { + if err := git.DiscardFull(batchReader, object.Size+1); err != nil { return nil, err } } diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 97a43b90fd3a2..fc06b7e7afa00 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -24,10 +24,10 @@ type Repository struct { tagCache *ObjectCache[*Tag] batchInUse bool - batch catfile.Batch + batch catfile.ObjectPool checkInUse bool - check catfile.Batch + check catfile.ObjectInfoPool Ctx context.Context LastCommitCache *LastCommitCache @@ -57,10 +57,10 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { } // CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func(), error) { +func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.ObjectPool, func(), error) { if repo.batch == nil { var err error - repo.batch, err = catfile.NewBatch(ctx, repo.Path) + repo.batch, err = catfile.NewObjectPool(ctx, repo.Path) if err != nil { return nil, nil, err } @@ -74,7 +74,7 @@ func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func() } log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := catfile.NewBatch(ctx, repo.Path) + tempBatch, err := catfile.NewObjectPool(ctx, repo.Path) if err != nil { return nil, nil, err } @@ -82,10 +82,10 @@ func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func() } // CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.Batch, func(), error) { +func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.ObjectInfoPool, func(), error) { if repo.check == nil { var err error - repo.check, err = catfile.NewBatchCheck(ctx, repo.Path) + repo.check, err = catfile.NewObjectInfoPool(ctx, repo.Path) if err != nil { return nil, nil, err } @@ -99,7 +99,7 @@ func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.Batch, f } log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := catfile.NewBatchCheck(ctx, repo.Path) + tempBatchCheck, err := catfile.NewObjectInfoPool(ctx, repo.Path) if err != nil { return nil, nil, err } diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 09873fb2c626d..1305edd60a1e4 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -8,7 +8,6 @@ package git import ( "bufio" - "bytes" "context" "io" "strings" @@ -23,19 +22,18 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) + log.Debug("Error writing to ObjectInfo %v", err) return false } - sha, _, _, err := ReadBatchLine(batch.Reader()) - return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) + return strings.HasPrefix(objInfo.ID, strings.TrimSpace(name)) } // IsReferenceExist returns true if given reference exists in the repository. @@ -44,18 +42,14 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { log.Debug("Error writing to CatFileBatchCheck %v", err) return false } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - _, _, _, err = ReadBatchLine(batch.Reader()) + + _, err = objInfoPool.ObjectInfo(repo.Ctx, name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index a3d728eb6d05b..02f35a9895e11 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -37,21 +37,20 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) + + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) if err != nil { + if IsErrNotExist(err) { + return "", ErrNotExist{name, ""} + } return "", err } - shaBs, _, _, err := ReadBatchLine(batch.Reader()) - if IsErrNotExist(err) { - return "", ErrNotExist{name, ""} - } - - return string(shaBs), nil + return objInfo.ID, nil } // IsCommitExist returns true if given commit exists in current repository. @@ -68,20 +67,17 @@ func (repo *Repository) IsCommitExist(name string) bool { } func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, _ = batch.Writer().Write([]byte(id.String() + "\n")) - - return repo.getCommitFromBatchReader(batch, id) + return repo.getCommitFromBatchReader(objectPool, id) } -func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectID) (*Commit, error) { - rd := batch.Reader() - _, typ, size, err := ReadBatchLine(rd) +func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, id ObjectID) (*Commit, error) { + object, err := objectPool.Object(repo.Ctx, id.String()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: id.String()} @@ -89,13 +85,15 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return nil, err } - switch typ { + rd := object.Reader + + switch object.Type { case "missing": return nil, ErrNotExist{ID: id.String()} case "tag": // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -108,18 +106,14 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return nil, err } - if _, err := batch.Writer().Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommitFromBatchReader(objectPool, tag.Object) if err != nil { return nil, err } return commit, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -130,8 +124,8 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return commit, nil default: - log.Debug("Unknown typ: %s", typ) - if err := DiscardFull(rd, size+1); err != nil { + log.Debug("Unknown typ: %s", object.Type) + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ @@ -153,22 +147,18 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, err = batch.Writer().Write([]byte(commitID + "\n")) - if err != nil { - return nil, err - } - sha, _, _, err := ReadBatchLine(batch.Reader()) + + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, commitID) if err != nil { if IsErrNotExist(err) { return nil, ErrNotExist{commitID, ""} } return nil, err } - - return MustIDFromString(string(sha)), nil + return MustIDFromString(objInfo.ID), nil } diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 88d9edcbd88b5..6d7ce5c385045 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -24,23 +24,19 @@ func (repo *Repository) IsTagExist(name string) bool { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + objInfoPool, cancel, err := repo.CatFileBatchCheck(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = batch.Writer().Write([]byte(id.String() + "\n")) - if err != nil { - return "", err - } - _, typ, _, err := ReadBatchLine(batch.Reader()) + objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, id.String()) if err != nil { if IsErrNotExist(err) { return "", ErrNotExist{ID: id.String()} } return "", err } - return typ, nil + return objInfo.Type, nil } func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { @@ -88,25 +84,23 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } // The tag is an annotated tag with a message. - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - rd := batch.Reader() - if _, err := batch.Writer().Write([]byte(tagID.String() + "\n")); err != nil { - return nil, err - } - _, typ, size, err := ReadBatchLine(rd) + object, err := objectPool.Object(repo.Ctx, tagID.String()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } - if typ != "tag" { - if err := DiscardFull(rd, size+1); err != nil { + + rd := object.Reader + if object.Type != "tag" { + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ID: tagID.String()} @@ -114,7 +108,7 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { return nil, err } diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index e6e2ee9fa0655..5a0b93bf7cb27 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -10,26 +10,23 @@ import ( ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { - batch, cancel, err := repo.CatFileBatch(repo.Ctx) + objectPool, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(id.String() + "\n")) - - // ignore the SHA - _, typ, size, err := ReadBatchLine(rd) + object, err := objectPool.Object(repo.Ctx, id.String()) if err != nil { return nil, err } - switch typ { + rd := object.Reader + + switch object.Type { case "tag": resolvedID := id - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -38,17 +35,14 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { return nil, err } - if _, err := wr.Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommitFromBatchReader(objectPool, tag.Object) if err != nil { return nil, err } commit.Tree.ResolvedID = resolvedID return &commit.Tree, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, object.Size)) if err != nil { return nil, err } @@ -64,14 +58,14 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { if err != nil { return nil, err } - tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, size) + tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, object.Size) if err != nil { return nil, err } tree.entriesParsed = true return tree, nil default: - if err := DiscardFull(rd, size+1); err != nil { + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 0ea7aeed9d44c..90fea1dbd9c55 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -15,23 +15,18 @@ func (te *TreeEntry) Size() int64 { return te.size } - batch, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + objInfoPool, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } defer cancel() - _, err = batch.Writer().Write([]byte(te.ID.String() + "\n")) + objInfo, err := objInfoPool.ObjectInfo(te.ptree.repo.Ctx, te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } - _, _, te.size, err = ReadBatchLine(batch.Reader()) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) - return 0 - } - + te.size = objInfo.Size te.sized = true return te.size } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index b8561dd3523e5..94e6d0357a275 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -27,32 +27,31 @@ func (t *Tree) ListEntries() (Entries, error) { } if t.repo != nil { - batch, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) + objectPool, cancel, err := t.repo.CatFileBatch(t.repo.Ctx) if err != nil { return nil, err } defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) - _, typ, sz, err := ReadBatchLine(rd) + object, err := objectPool.Object(t.repo.Ctx, t.ID.String()) if err != nil { return nil, err } - if typ == "commit" { - treeID, err := ReadTreeID(rd, sz) + + rd := object.Reader + + if object.Type == "commit" { + treeID, err := ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) - _, typ, sz, err = ReadBatchLine(rd) + object, err = objectPool.Object(t.repo.Ctx, treeID) if err != nil { return nil, err } } - if typ == "tree" { - t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, sz) + if object.Type == "tree" { + t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, object.Size) if err != nil { return nil, err } @@ -61,7 +60,7 @@ func (t *Tree) ListEntries() (Entries, error) { } // Not a tree just use ls-tree instead - if err := DiscardFull(rd, sz+1); err != nil { + if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err } } diff --git a/modules/gitrepo/cat_file.go b/modules/gitrepo/cat_file.go index 0e5fc9951c3ba..d02421d9b780a 100644 --- a/modules/gitrepo/cat_file.go +++ b/modules/gitrepo/cat_file.go @@ -9,6 +9,6 @@ import ( "code.gitea.io/gitea/modules/git/catfile" ) -func NewBatch(ctx context.Context, repo Repository) (catfile.Batch, error) { - return catfile.NewBatch(ctx, repoPath(repo)) +func NewObjectPool(ctx context.Context, repo Repository) (catfile.ObjectPool, error) { + return catfile.NewObjectPool(ctx, repoPath(repo)) } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index a3727bd0cbd99..48b7c632db6bd 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -14,7 +14,6 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" - "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -151,7 +150,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -177,16 +176,13 @@ func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, com return b.addDelete(update.Filename, repo, batch) } - if _, err := catfileBatch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return err - } - - batchReader := catfileBatch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + object, err := objectPool.Object(ctx, update.BlobSha) if err != nil { return err } + batchReader := object.Reader + fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { return err @@ -219,18 +215,18 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { - catfileBatch, err := gitrepo.NewBatch(ctx, repo) + objectPool, err := gitrepo.NewObjectPool(ctx, repo) if err != nil { return err } - defer catfileBatch.Close() + defer objectPool.Close() for _, update := range changes.Updates { - if err := b.addUpdate(ctx, catfileBatch, sha, update, repo, batch); err != nil { + if err := b.addUpdate(ctx, objectPool, sha, update, repo, batch); err != nil { return err } } - catfileBatch.Close() + objectPool.Close() } for _, filename := range changes.RemovedFilenames { if err := b.addDelete(filename, repo, batch); err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 653df0bd1102d..2a48d4f6568cc 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -13,7 +13,6 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" - "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -139,7 +138,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -162,15 +161,13 @@ func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return nil, err - } - - batchReader := batch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + object, err := objectPool.Object(ctx, update.BlobSha) if err != nil { return nil, err } + size = object.Size + + batchReader := object.Reader fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { @@ -211,14 +208,13 @@ func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elasti func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { - batch, err := gitrepo.NewBatch(ctx, repo) + objectPool, err := gitrepo.NewObjectPool(ctx, repo) if err != nil { return err } - defer batch.Close() - + defer objectPool.Close() for _, update := range changes.Updates { - updateReqs, err := b.addUpdate(ctx, batch, sha, update, repo) + updateReqs, err := b.addUpdate(ctx, objectPool, sha, update, repo) if err != nil { return err } @@ -226,7 +222,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st reqs = append(reqs, updateReqs...) } } - batch.Close() + objectPool.Close() } for _, filename := range changes.RemovedFilenames { From dac0492b8eab3540b6e2872a4bab76619839293f Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 5 Jan 2026 14:22:29 -0800 Subject: [PATCH 2/3] some improvements --- modules/git/blob_nogogit.go | 9 ++++++--- modules/git/catfile/object_pool.go | 10 ++-------- modules/git/catfile/object_pool_cmd.go | 13 ++++++------- .../git/languagestats/language_stats_nogogit.go | 14 +++++++++----- modules/git/pipeline/lfs_nogogit.go | 8 +++++--- modules/git/repo_branch_nogogit.go | 4 ++-- modules/git/repo_commit_nogogit.go | 14 ++++++-------- modules/git/repo_tag_nogogit.go | 10 +++++----- modules/git/repo_tree_nogogit.go | 11 ++++++++--- modules/git/tree_entry_nogogit.go | 2 +- modules/git/tree_nogogit.go | 17 +++++++++++++---- modules/indexer/code/bleve/bleve.go | 8 ++++++-- .../indexer/code/elasticsearch/elasticsearch.go | 8 +++++--- 13 files changed, 74 insertions(+), 54 deletions(-) diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index ccede82545084..5f83740f6bf0c 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -10,6 +10,7 @@ import ( "bytes" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -31,13 +32,15 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) { return nil, err } - object, err := objectPool.Object(b.repo.Ctx, b.ID.String()) + object, rd, err := objectPool.Object(b.ID.String()) if err != nil { cancel() + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ID: b.ID.String()} + } return nil, err } - rd := object.Reader b.gotSize = true b.size = object.Size @@ -70,7 +73,7 @@ func (b *Blob) Size() int64 { return 0 } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(b.repo.Ctx, b.ID.String()) + objInfo, err := objInfoPool.ObjectInfo(b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 diff --git a/modules/git/catfile/object_pool.go b/modules/git/catfile/object_pool.go index b48e27768a959..9000e1f639b20 100644 --- a/modules/git/catfile/object_pool.go +++ b/modules/git/catfile/object_pool.go @@ -5,7 +5,6 @@ package catfile import ( "bufio" - "context" ) type ObjectInfo struct { @@ -15,16 +14,11 @@ type ObjectInfo struct { } type ObjectInfoPool interface { - ObjectInfo(ctx context.Context, sha string) (*ObjectInfo, error) + ObjectInfo(refName string) (*ObjectInfo, error) Close() } -type Object struct { - ObjectInfo - Reader *bufio.Reader -} - type ObjectPool interface { - Object(ctx context.Context, sha string) (*Object, error) + Object(refName string) (*ObjectInfo, *bufio.Reader, error) Close() } diff --git a/modules/git/catfile/object_pool_cmd.go b/modules/git/catfile/object_pool_cmd.go index 9e9750465b983..5495599a7eeb8 100644 --- a/modules/git/catfile/object_pool_cmd.go +++ b/modules/git/catfile/object_pool_cmd.go @@ -28,7 +28,7 @@ func NewObjectInfoPool(ctx context.Context, repoPath string) (ObjectInfoPool, er return &check, nil } -func (b *batchCheck) ObjectInfo(ctx context.Context, refName string) (*ObjectInfo, error) { +func (b *batchCheck) ObjectInfo(refName string) (*ObjectInfo, error) { _, err := b.writer.Write([]byte(refName + "\n")) if err != nil { return nil, err @@ -74,22 +74,21 @@ func NewObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) { return &batch, nil } -func (b *batch) Object(ctx context.Context, refName string) (*Object, error) { +func (b *batch) Object(refName string) (*ObjectInfo, *bufio.Reader, error) { _, err := b.writer.Write([]byte(refName + "\n")) if err != nil { - return nil, err + return nil, nil, err } - var obj Object + var obj ObjectInfo var oid []byte oid, obj.Type, obj.Size, err = ReadBatchLine(b.reader) if err != nil { - return nil, err + return nil, nil, err } obj.ID = string(oid) - obj.Reader = b.reader - return &obj, nil + return &obj, b.reader, nil } func (b *batch) Close() { diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index 3597a0960d1ca..4bbf272be1820 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/attribute" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/optional" @@ -28,8 +29,11 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, } defer cancel() - object, err := objectPool.Object(repo.Ctx, commitID) + object, batchReader, err := objectPool.Object(commitID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: commitID} + } return nil, err } if object.Type != "commit" { @@ -37,8 +41,6 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, return nil, git.ErrNotExist{ID: commitID} } - batchReader := object.Reader - sha, err := git.NewIDFromString(object.ID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) @@ -141,12 +143,14 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - object, err := objectPool.Object(repo.Ctx, f.ID.String()) + object, batchReader, err := objectPool.Object(f.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: f.ID.String()} + } log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } - batchReader := object.Reader sizeToRead := object.Size discard := int64(1) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 51f0cf26407d2..cc91b74b54a03 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -14,6 +14,7 @@ import ( "sync" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" ) @@ -71,13 +72,14 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err commitReadingLoop: for { - object, err := objectPool.Object(repo.Ctx, commitID) + object, batchReader, err := objectPool.Object(commitID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: commitID} + } return nil, err } - batchReader := object.Reader - switch object.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 1305edd60a1e4..975bce9455cf4 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -28,7 +28,7 @@ func (repo *Repository) IsObjectExist(name string) bool { return false } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) + objInfo, err := objInfoPool.ObjectInfo(name) if err != nil { log.Debug("Error writing to ObjectInfo %v", err) return false @@ -49,7 +49,7 @@ func (repo *Repository) IsReferenceExist(name string) bool { } defer cancel() - _, err = objInfoPool.ObjectInfo(repo.Ctx, name) + _, err = objInfoPool.ObjectInfo(name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index 02f35a9895e11..572384563801d 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -43,9 +43,9 @@ func (repo *Repository) GetRefCommitID(name string) (string, error) { } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, name) + objInfo, err := objInfoPool.ObjectInfo(name) if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{name, ""} } return "", err @@ -77,16 +77,14 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { } func (repo *Repository) getCommitFromBatchReader(objectPool catfile.ObjectPool, id ObjectID) (*Commit, error) { - object, err := objectPool.Object(repo.Ctx, id.String()) + object, rd, err := objectPool.Object(id.String()) if err != nil { - if errors.Is(err, io.EOF) || IsErrNotExist(err) { + if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: id.String()} } return nil, err } - rd := object.Reader - switch object.Type { case "missing": return nil, ErrNotExist{ID: id.String()} @@ -153,9 +151,9 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, commitID) + objInfo, err := objInfoPool.ObjectInfo(commitID) if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{commitID, ""} } return nil, err diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 6d7ce5c385045..e089850212844 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -10,6 +10,7 @@ import ( "errors" "io" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" ) @@ -29,9 +30,9 @@ func (repo *Repository) GetTagType(id ObjectID) (string, error) { return "", err } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(repo.Ctx, id.String()) + objInfo, err := objInfoPool.ObjectInfo(id.String()) if err != nil { - if IsErrNotExist(err) { + if catfile.IsErrObjectNotFound(err) { return "", ErrNotExist{ID: id.String()} } return "", err @@ -90,15 +91,14 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } defer cancel() - object, err := objectPool.Object(repo.Ctx, tagID.String()) + object, rd, err := objectPool.Object(tagID.String()) if err != nil { - if errors.Is(err, io.EOF) || IsErrNotExist(err) { + if errors.Is(err, io.EOF) || catfile.IsErrObjectNotFound(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } - rd := object.Reader if object.Type != "tag" { if err := DiscardFull(rd, object.Size+1); err != nil { return nil, err diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index 5a0b93bf7cb27..13981d532d37e 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -7,6 +7,8 @@ package git import ( "io" + + "code.gitea.io/gitea/modules/git/catfile" ) func (repo *Repository) getTree(id ObjectID) (*Tree, error) { @@ -16,13 +18,16 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { } defer cancel() - object, err := objectPool.Object(repo.Ctx, id.String()) + object, rd, err := objectPool.Object(id.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: id.String(), + } + } return nil, err } - rd := object.Reader - switch object.Type { case "tag": resolvedID := id diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 90fea1dbd9c55..73b345868d17b 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -21,7 +21,7 @@ func (te *TreeEntry) Size() int64 { return 0 } defer cancel() - objInfo, err := objInfoPool.ObjectInfo(te.ptree.repo.Ctx, te.ID.String()) + objInfo, err := objInfoPool.ObjectInfo(te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index 94e6d0357a275..b552a758926b7 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -9,6 +9,7 @@ import ( "io" "strings" + "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" ) @@ -33,20 +34,28 @@ func (t *Tree) ListEntries() (Entries, error) { } defer cancel() - object, err := objectPool.Object(t.repo.Ctx, t.ID.String()) + object, rd, err := objectPool.Object(t.ID.String()) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: t.ID.String(), + } + } return nil, err } - rd := object.Reader - if object.Type == "commit" { treeID, err := ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err } - object, err = objectPool.Object(t.repo.Ctx, treeID) + object, rd, err = objectPool.Object(treeID) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, ErrNotExist{ + ID: treeID, + } + } return nil, err } } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 48b7c632db6bd..4b1278774f22f 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -14,6 +14,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -176,12 +177,15 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return b.addDelete(update.Filename, repo, batch) } - object, err := objectPool.Object(ctx, update.BlobSha) + object, batchReader, err := objectPool.Object(update.BlobSha) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return git.ErrNotExist{ID: update.BlobSha} + } return err } - batchReader := object.Reader + size = object.Size fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 2a48d4f6568cc..abb5495655d41 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -13,6 +13,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" @@ -161,14 +162,15 @@ func (b *Indexer) addUpdate(ctx context.Context, objectPool catfile.ObjectPool, return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - object, err := objectPool.Object(ctx, update.BlobSha) + object, batchReader, err := objectPool.Object(update.BlobSha) if err != nil { + if catfile.IsErrObjectNotFound(err) { + return nil, git.ErrNotExist{ID: update.BlobSha} + } return nil, err } size = object.Size - batchReader := object.Reader - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) if err != nil { return nil, err From 67847fbbea6a33b7541b57eb7f81577ab8a40528 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 5 Jan 2026 14:31:42 -0800 Subject: [PATCH 3/3] remove unnecessary functions --- modules/git/batch_reader.go | 21 --------------------- modules/git/pipeline/lfs_nogogit.go | 4 ++-- modules/git/tree_nogogit.go | 2 +- 3 files changed, 3 insertions(+), 24 deletions(-) diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go index 3d612f5549e30..ae4f49dabe6f2 100644 --- a/modules/git/batch_reader.go +++ b/modules/git/batch_reader.go @@ -10,27 +10,6 @@ import ( "code.gitea.io/gitea/modules/git/catfile" ) -// ReadBatchLine reads the header line from cat-file --batch while preserving the traditional return signature. -func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { - sha, typ, size, err = catfile.ReadBatchLine(rd) - return sha, typ, size, convertCatfileError(err, sha) -} - -// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTagObjectID(rd, size) -} - -// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTreeID(rd, size) -} - -// BinToHex converts a binary hash into a hex encoded one. -func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { - return catfile.BinToHex(objectFormat, sha, out) -} - // ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream. func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { mode, fname, sha, n, err = catfile.ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index cc91b74b54a03..b28a816d0df13 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -83,7 +83,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err switch object.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := git.ReadTagObjectID(batchReader, object.Size) + id, err := catfile.ReadTagObjectID(batchReader, object.Size) if err != nil { return nil, err } @@ -120,7 +120,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result } else if string(mode) == git.EntryModeTree.String() { hexObjectID := make([]byte, objectID.Type().FullLength()) - git.BinToHex(objectID.Type(), binObjectID, hexObjectID) + catfile.BinToHex(objectID.Type(), binObjectID, hexObjectID) trees = append(trees, hexObjectID) paths = append(paths, curPath+string(fname)+"/") } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index b552a758926b7..f975b23153de6 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -45,7 +45,7 @@ func (t *Tree) ListEntries() (Entries, error) { } if object.Type == "commit" { - treeID, err := ReadTreeID(rd, object.Size) + treeID, err := catfile.ReadTreeID(rd, object.Size) if err != nil && err != io.EOF { return nil, err }