Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,6 @@ import (
"code.gitea.io/gitea/modules/git/catfile"
)

// ReadBatchLine reads the header line from cat-file --batch while preserving the traditional return signature.
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
sha, typ, size, err = catfile.ReadBatchLine(rd)
return sha, typ, size, convertCatfileError(err, sha)
}

// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) {
return catfile.ReadTagObjectID(rd, size)
}

// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
func ReadTreeID(rd *bufio.Reader, size int64) (string, error) {
return catfile.ReadTreeID(rd, size)
}

// BinToHex converts a binary hash into a hex encoded one.
func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte {
return catfile.BinToHex(objectFormat, sha, out)
}

// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream.
func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
mode, fname, sha, n, err = catfile.ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf)
Expand Down
34 changes: 14 additions & 20 deletions modules/git/blob_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"bytes"
"io"

"code.gitea.io/gitea/modules/git/catfile"
"code.gitea.io/gitea/modules/log"
)

Expand All @@ -26,27 +27,25 @@ type Blob struct {
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
objectPool, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
return nil, err
}

rd := batch.Reader()
_, err = batch.Writer().Write([]byte(b.ID.String() + "\n"))
if err != nil {
cancel()
return nil, err
}
_, _, size, err := ReadBatchLine(rd)
object, rd, err := objectPool.Object(b.ID.String())
if err != nil {
cancel()
if catfile.IsErrObjectNotFound(err) {
return nil, ErrNotExist{ID: b.ID.String()}
}
return nil, err
}

b.gotSize = true
b.size = size
b.size = object.Size

if size < 4096 {
bs, err := io.ReadAll(io.LimitReader(rd, size))
if b.size < 4096 {
bs, err := io.ReadAll(io.LimitReader(rd, b.size))
defer cancel()
if err != nil {
return nil, err
Expand All @@ -57,7 +56,7 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) {

return &blobReader{
rd: rd,
n: size,
n: b.size,
cancel: cancel,
}, nil
}
Expand All @@ -68,25 +67,20 @@ func (b *Blob) Size() int64 {
return b.size
}

batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx)
objInfoPool, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
defer cancel()
_, err = batch.Writer().Write([]byte(b.ID.String() + "\n"))
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
_, _, b.size, err = ReadBatchLine(batch.Reader())
objInfo, err := objInfoPool.ObjectInfo(b.ID.String())
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}

b.size = objInfo.Size
b.gotSize = true

return b.size
}

Expand Down
57 changes: 0 additions & 57 deletions modules/git/catfile/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,63 +21,6 @@ type WriteCloserError interface {
CloseWithError(err error) error
}

type Batch interface {
Writer() WriteCloserError
Reader() *bufio.Reader
Close()
}

// batch represents an active `git cat-file --batch` or `--batch-check` invocation
// paired with the pipes that feed/read from it. Call Close to release resources.
type batch struct {
cancel context.CancelFunc
reader *bufio.Reader
writer WriteCloserError
}

// NewBatch creates a new cat-file --batch process for the provided repository path.
// The returned Batch must be closed once the caller has finished with it.
func NewBatch(ctx context.Context, repoPath string) (Batch, error) {
if err := EnsureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var batch batch
batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath)
return &batch, nil
}

// NewBatchCheck creates a cat-file --batch-check process for the provided repository path.
// The returned Batch must be closed once the caller has finished with it.
func NewBatchCheck(ctx context.Context, repoPath string) (Batch, error) {
if err := EnsureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var check batch
check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath)
return &check, nil
}

func (b *batch) Writer() WriteCloserError {
return b.writer
}

func (b *batch) Reader() *bufio.Reader {
return b.reader
}

// Close stops the underlying git cat-file process and releases held resources.
func (b *batch) Close() {
if b == nil || b.cancel == nil {
return
}
b.cancel()
b.reader = nil
b.writer = nil
b.cancel = nil
}

// EnsureValidGitRepository runs `git rev-parse` in the repository path to make sure
// the directory is a valid git repository. This avoids git cat-file hanging indefinitely
// when invoked in invalid paths.
Expand Down
24 changes: 24 additions & 0 deletions modules/git/catfile/object_pool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package catfile

import (
"bufio"
)

type ObjectInfo struct {
ID string
Type string
Size int64
}

type ObjectInfoPool interface {
ObjectInfo(refName string) (*ObjectInfo, error)
Close()
}

type ObjectPool interface {
Object(refName string) (*ObjectInfo, *bufio.Reader, error)
Close()
}
101 changes: 101 additions & 0 deletions modules/git/catfile/object_pool_cmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package catfile

import (
"bufio"
"context"
)

// batchCheck represents an active `git cat-file --batch-check` invocation
// paired with the pipes that feed/read from it. Call Close to release resources.
type batchCheck struct {
cancel context.CancelFunc
reader *bufio.Reader
writer WriteCloserError
}

// NewBatchCheck creates a cat-file --batch-check process for the provided repository path.
// The returned Batch must be closed once the caller has finished with it.
func NewObjectInfoPool(ctx context.Context, repoPath string) (ObjectInfoPool, error) {
if err := EnsureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var check batchCheck
check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath)
return &check, nil
}

func (b *batchCheck) ObjectInfo(refName string) (*ObjectInfo, error) {
_, err := b.writer.Write([]byte(refName + "\n"))
if err != nil {
return nil, err
}

var objInfo ObjectInfo
var oid []byte
oid, objInfo.Type, objInfo.Size, err = ReadBatchLine(b.reader)
if err != nil {
return nil, err
}
objInfo.ID = string(oid)
return &objInfo, nil
}

// Close stops the underlying git cat-file process and releases held resources.
func (b *batchCheck) Close() {
if b.cancel != nil {
b.cancel()
}
if b.writer != nil {
_ = b.writer.Close()
}
}

// batch represents an active `git cat-file --batch` invocation
// paired with the pipes that feed/read from it. Call Close to release resources.
type batch struct {
cancel context.CancelFunc
reader *bufio.Reader
writer WriteCloserError
}

// NewBatch creates a new cat-file --batch process for the provided repository path.
// The returned Batch must be closed once the caller has finished with it.
func NewObjectPool(ctx context.Context, repoPath string) (ObjectPool, error) {
if err := EnsureValidGitRepository(ctx, repoPath); err != nil {
return nil, err
}

var batch batch
batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath)
return &batch, nil
}

func (b *batch) Object(refName string) (*ObjectInfo, *bufio.Reader, error) {
_, err := b.writer.Write([]byte(refName + "\n"))
if err != nil {
return nil, nil, err
}

var obj ObjectInfo
var oid []byte
oid, obj.Type, obj.Size, err = ReadBatchLine(b.reader)
if err != nil {
return nil, nil, err
}
obj.ID = string(oid)

return &obj, b.reader, nil
}

func (b *batch) Close() {
if b.cancel != nil {
b.cancel()
}
if b.writer != nil {
_ = b.writer.Close()
}
}
36 changes: 17 additions & 19 deletions modules/git/languagestats/language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/attribute"
"code.gitea.io/gitea/modules/git/catfile"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"

Expand All @@ -22,34 +23,31 @@ import (
func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, error) {
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
// so let's create a batch stdin and stdout
batch, cancel, err := repo.CatFileBatch(repo.Ctx)
objectPool, cancel, err := repo.CatFileBatch(repo.Ctx)
if err != nil {
return nil, err
}
defer cancel()

writeID := func(id string) error {
_, err := batch.Writer().Write([]byte(id + "\n"))
return err
}

if err := writeID(commitID); err != nil {
object, batchReader, err := objectPool.Object(commitID)
if err != nil {
if catfile.IsErrObjectNotFound(err) {
return nil, git.ErrNotExist{ID: commitID}
}
return nil, err
}
batchReader := batch.Reader()
shaBytes, typ, size, err := git.ReadBatchLine(batchReader)
if typ != "commit" {
if object.Type != "commit" {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
return nil, git.ErrNotExist{ID: commitID}
}

sha, err := git.NewIDFromString(string(shaBytes))
sha, err := git.NewIDFromString(object.ID)
if err != nil {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
return nil, git.ErrNotExist{ID: commitID}
}

commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size))
commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, object.Size))
if err != nil {
log.Debug("Unable to get commit for: %s. Err: %v", commitID, err)
return nil, err
Expand Down Expand Up @@ -145,20 +143,20 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
// If content can not be read or file is too big just do detection by filename

if f.Size() <= bigFileSize {
if err := writeID(f.ID.String()); err != nil {
return nil, err
}
_, _, size, err := git.ReadBatchLine(batchReader)
object, batchReader, err := objectPool.Object(f.ID.String())
if err != nil {
if catfile.IsErrObjectNotFound(err) {
return nil, git.ErrNotExist{ID: f.ID.String()}
}
log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err)
return nil, err
}

sizeToRead := size
sizeToRead := object.Size
discard := int64(1)
if size > fileSizeLimit {
if object.Size > fileSizeLimit {
sizeToRead = fileSizeLimit
discard = size - fileSizeLimit + 1
discard = object.Size - fileSizeLimit + 1
}

_, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead))
Expand Down
Loading