Skip to content

Commit

Permalink
walk commits in history lazily
Browse files Browse the repository at this point in the history
Fixes #725

Because `git.Log` from go-git traverses the history and computes the flattened list of commits before you can call `Next`, instead of traversing it lazily, gitbase took the same getting 1 commit than all of them.
Instead, since we don't care about the commit order or any of the constraints go-git's log has, we can just walk all the commits lazily as they're requested.

This query:

SELECT commit_author_email FROM commits LIMIT 1

Took ~1m to execute. Now it takes ~5s with cold cache.

Basically, walk all references that are hash references and from the HEAD commit of each reference walk the history until there are no more commits to walk skipping commits that have already been seen. Then, next reference and so on.

Signed-off-by: Miguel Molina <[email protected]>
  • Loading branch information
erizocosmico committed Mar 14, 2019
1 parent 0b4af78 commit 0c6afe8
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 151 deletions.
16 changes: 9 additions & 7 deletions commit_blobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"io"

git "gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-mysql-server.v0/sql"
Expand Down Expand Up @@ -103,6 +102,7 @@ func (t *commitBlobsTable) PartitionRows(
return &commitBlobsRowIter{
repo: repo,
commits: stringsToHashes(commits),
iter: nil,
index: indexValues,
skipGitErrors: shouldSkipErrors(ctx),
}, nil
Expand Down Expand Up @@ -216,16 +216,18 @@ func (i *commitBlobsRowIter) Next() (sql.Row, error) {
}

func (i *commitBlobsRowIter) init() error {
var err error
if len(i.commits) > 0 {
i.iter, err = NewCommitsByHashIter(i.repo, i.commits)
i.iter = newCommitsByHashIter(i.repo, i.commits)
} else {
i.iter, err = i.repo.Log(&git.LogOptions{
All: true,
})
iter, err := newCommitIter(i.repo, i.skipGitErrors)
if err != nil {
return err
}

i.iter = iter
}

return err
return nil
}

var commitBlobsCommitIdx = CommitBlobsSchema.IndexOf("commit_hash", CommitBlobsTableName)
Expand Down
15 changes: 8 additions & 7 deletions commit_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,17 +193,18 @@ func (i *commitFilesRowIter) Next() (sql.Row, error) {
}

func (i *commitFilesRowIter) init() error {
var err error
if len(i.commitHashes) > 0 {
i.commits, err = NewCommitsByHashIter(i.repo, i.commitHashes)
i.commits = newCommitsByHashIter(i.repo, i.commitHashes)
} else {
i.commits, err = i.repo.
Log(&git.LogOptions{
All: true,
})
iter, err := newCommitIter(i.repo, i.skipGitErrors)
if err != nil {
return err
}

i.commits = iter
}

return err
return nil
}

func (i *commitFilesRowIter) nextFromIndex() (sql.Row, error) {
Expand Down
21 changes: 3 additions & 18 deletions commit_files_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func TestCommitFilesTableRowIter(t *testing.T) {
}
}

require.Equal(expected, rows)
require.ElementsMatch(expected, rows)
}

func TestCommitFilesIndex(t *testing.T) {
Expand Down Expand Up @@ -130,8 +130,6 @@ func TestCommitFilesIterClosed(t *testing.T) {
}

func TestPartitionRowsWithIndex(t *testing.T) {

t.Helper()
require := require.New(t)
ctx, _, cleanup := setup(t)
defer cleanup()
Expand All @@ -143,21 +141,8 @@ func TestPartitionRowsWithIndex(t *testing.T) {
lookup := tableIndexLookup(t, table, ctx)
tbl := table.WithIndexLookup(lookup)

pit, err := tbl.Partitions(ctx)
result, err := tableToRows(ctx, tbl)
require.NoError(err)

i := 0
for p, e := pit.Next(); e != io.EOF; p, e = pit.Next() {
require.NoError(e)

rit, err := tbl.PartitionRows(ctx, p)
require.NoError(err)

for r, e := rit.Next(); e != io.EOF; r, e = rit.Next() {
require.NoError(e)

require.ElementsMatch(expected[i], r)
i++
}
}
require.ElementsMatch(expected, result)
}
15 changes: 8 additions & 7 deletions commit_trees.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"io"

git "gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"

"gopkg.in/src-d/go-git.v4/plumbing/filemode"
Expand Down Expand Up @@ -212,16 +211,18 @@ func (i *commitTreesRowIter) Next() (sql.Row, error) {
}

func (i *commitTreesRowIter) init() error {
var err error
if len(i.commitHashes) > 0 {
i.commits, err = NewCommitsByHashIter(i.repo, i.commitHashes)
i.commits = newCommitsByHashIter(i.repo, i.commitHashes)
} else {
i.commits, err = i.repo.Log(&git.LogOptions{
All: true,
})
iter, err := newCommitIter(i.repo, i.skipGitErrors)
if err != nil {
return err
}

i.commits = iter
}

return err
return nil
}

var commitTreesHashIdx = CommitTreesSchema.IndexOf("commit_hash", CommitTreesTableName)
Expand Down
4 changes: 2 additions & 2 deletions commit_trees_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func TestCommitTreesRowIter(t *testing.T) {
{"b8e471f58bcbca63b07bda20e428190409c2db47", "c2d30fa8ef288618f65f6eed6e168e0d514886f4"},
}

require.Equal(expected, rows)
require.ElementsMatch(expected, rows)
}

func TestCommitTreesPushdown(t *testing.T) {
Expand Down Expand Up @@ -109,7 +109,7 @@ func TestCommitTreesPushdown(t *testing.T) {
rows[i] = row[1:]
}

require.Equal(tt.expected, rows)
require.ElementsMatch(tt.expected, rows)
})
}
}
Expand Down
Loading

0 comments on commit 0c6afe8

Please sign in to comment.