Skip to content

Commit 0c6afe8

Browse files
committed
walk commits in history lazily
Fixes #725 Because `git.Log` from go-git traverses the history and computes the flattened list of commits before you can call `Next`, instead of traversing it lazily, gitbase took the same getting 1 commit than all of them. Instead, since we don't care about the commit order or any of the constraints go-git's log has, we can just walk all the commits lazily as they're requested. This query: SELECT commit_author_email FROM commits LIMIT 1 Took ~1m to execute. Now it takes ~5s with cold cache. Basically, walk all references that are hash references and from the HEAD commit of each reference walk the history until there are no more commits to walk skipping commits that have already been seen. Then, next reference and so on. Signed-off-by: Miguel Molina <[email protected]>
1 parent 0b4af78 commit 0c6afe8

File tree

7 files changed

+206
-151
lines changed

7 files changed

+206
-151
lines changed

commit_blobs.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"bytes"
55
"io"
66

7-
git "gopkg.in/src-d/go-git.v4"
87
"gopkg.in/src-d/go-git.v4/plumbing"
98
"gopkg.in/src-d/go-git.v4/plumbing/object"
109
"gopkg.in/src-d/go-mysql-server.v0/sql"
@@ -103,6 +102,7 @@ func (t *commitBlobsTable) PartitionRows(
103102
return &commitBlobsRowIter{
104103
repo: repo,
105104
commits: stringsToHashes(commits),
105+
iter: nil,
106106
index: indexValues,
107107
skipGitErrors: shouldSkipErrors(ctx),
108108
}, nil
@@ -216,16 +216,18 @@ func (i *commitBlobsRowIter) Next() (sql.Row, error) {
216216
}
217217

218218
func (i *commitBlobsRowIter) init() error {
219-
var err error
220219
if len(i.commits) > 0 {
221-
i.iter, err = NewCommitsByHashIter(i.repo, i.commits)
220+
i.iter = newCommitsByHashIter(i.repo, i.commits)
222221
} else {
223-
i.iter, err = i.repo.Log(&git.LogOptions{
224-
All: true,
225-
})
222+
iter, err := newCommitIter(i.repo, i.skipGitErrors)
223+
if err != nil {
224+
return err
225+
}
226+
227+
i.iter = iter
226228
}
227229

228-
return err
230+
return nil
229231
}
230232

231233
var commitBlobsCommitIdx = CommitBlobsSchema.IndexOf("commit_hash", CommitBlobsTableName)

commit_files.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,17 +193,18 @@ func (i *commitFilesRowIter) Next() (sql.Row, error) {
193193
}
194194

195195
func (i *commitFilesRowIter) init() error {
196-
var err error
197196
if len(i.commitHashes) > 0 {
198-
i.commits, err = NewCommitsByHashIter(i.repo, i.commitHashes)
197+
i.commits = newCommitsByHashIter(i.repo, i.commitHashes)
199198
} else {
200-
i.commits, err = i.repo.
201-
Log(&git.LogOptions{
202-
All: true,
203-
})
199+
iter, err := newCommitIter(i.repo, i.skipGitErrors)
200+
if err != nil {
201+
return err
202+
}
203+
204+
i.commits = iter
204205
}
205206

206-
return err
207+
return nil
207208
}
208209

209210
func (i *commitFilesRowIter) nextFromIndex() (sql.Row, error) {

commit_files_test.go

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func TestCommitFilesTableRowIter(t *testing.T) {
6565
}
6666
}
6767

68-
require.Equal(expected, rows)
68+
require.ElementsMatch(expected, rows)
6969
}
7070

7171
func TestCommitFilesIndex(t *testing.T) {
@@ -130,8 +130,6 @@ func TestCommitFilesIterClosed(t *testing.T) {
130130
}
131131

132132
func TestPartitionRowsWithIndex(t *testing.T) {
133-
134-
t.Helper()
135133
require := require.New(t)
136134
ctx, _, cleanup := setup(t)
137135
defer cleanup()
@@ -143,21 +141,8 @@ func TestPartitionRowsWithIndex(t *testing.T) {
143141
lookup := tableIndexLookup(t, table, ctx)
144142
tbl := table.WithIndexLookup(lookup)
145143

146-
pit, err := tbl.Partitions(ctx)
144+
result, err := tableToRows(ctx, tbl)
147145
require.NoError(err)
148146

149-
i := 0
150-
for p, e := pit.Next(); e != io.EOF; p, e = pit.Next() {
151-
require.NoError(e)
152-
153-
rit, err := tbl.PartitionRows(ctx, p)
154-
require.NoError(err)
155-
156-
for r, e := rit.Next(); e != io.EOF; r, e = rit.Next() {
157-
require.NoError(e)
158-
159-
require.ElementsMatch(expected[i], r)
160-
i++
161-
}
162-
}
147+
require.ElementsMatch(expected, result)
163148
}

commit_trees.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"bytes"
55
"io"
66

7-
git "gopkg.in/src-d/go-git.v4"
87
"gopkg.in/src-d/go-git.v4/plumbing"
98

109
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
@@ -212,16 +211,18 @@ func (i *commitTreesRowIter) Next() (sql.Row, error) {
212211
}
213212

214213
func (i *commitTreesRowIter) init() error {
215-
var err error
216214
if len(i.commitHashes) > 0 {
217-
i.commits, err = NewCommitsByHashIter(i.repo, i.commitHashes)
215+
i.commits = newCommitsByHashIter(i.repo, i.commitHashes)
218216
} else {
219-
i.commits, err = i.repo.Log(&git.LogOptions{
220-
All: true,
221-
})
217+
iter, err := newCommitIter(i.repo, i.skipGitErrors)
218+
if err != nil {
219+
return err
220+
}
221+
222+
i.commits = iter
222223
}
223224

224-
return err
225+
return nil
225226
}
226227

227228
var commitTreesHashIdx = CommitTreesSchema.IndexOf("commit_hash", CommitTreesTableName)

commit_trees_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func TestCommitTreesRowIter(t *testing.T) {
5353
{"b8e471f58bcbca63b07bda20e428190409c2db47", "c2d30fa8ef288618f65f6eed6e168e0d514886f4"},
5454
}
5555

56-
require.Equal(expected, rows)
56+
require.ElementsMatch(expected, rows)
5757
}
5858

5959
func TestCommitTreesPushdown(t *testing.T) {
@@ -109,7 +109,7 @@ func TestCommitTreesPushdown(t *testing.T) {
109109
rows[i] = row[1:]
110110
}
111111

112-
require.Equal(tt.expected, rows)
112+
require.ElementsMatch(tt.expected, rows)
113113
})
114114
}
115115
}

0 commit comments

Comments
 (0)