Skip to content

Commit

Permalink
*: replace pilosalib with new pilosa driver
Browse files Browse the repository at this point in the history
Signed-off-by: Miguel Molina <[email protected]>
  • Loading branch information
erizocosmico committed Sep 24, 2018
1 parent b0c383e commit 3b15073
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 64 deletions.
3 changes: 0 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ addons:
- g++-6

before_install:
- docker pull pilosa/pilosa:v1.1.0
- docker run -d --name pilosa -p 127.0.0.1:10101:10101 pilosa/pilosa:v1.1.0
- docker ps -a
- sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-6 90
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 90

Expand Down
39 changes: 6 additions & 33 deletions _testdata/regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,50 +25,30 @@
Statements:
- SELECT repository_id,contributor_count FROM (SELECT repository_id, COUNT(DISTINCT commit_author_email) AS contributor_count FROM commits GROUP BY repository_id) AS q ORDER BY contributor_count DESC LIMIT 10
-
ID: 'query5'
ID: 'query6'
Name: 'Create pilosa index on language UDF'
Statements:
- CREATE INDEX language_idx ON files USING pilosa (language(file_path, blob_content)) WITH (async = false)
- DROP INDEX language_idx ON files
-
ID: 'query6'
Name: 'Create pilosalib index on language UDF'
Statements:
- CREATE INDEX language_idx ON files USING pilosalib (language(file_path, blob_content)) WITH (async = false)
- DROP INDEX language_idx ON files
-
ID: 'query7'
ID: 'query8'
Name: 'Query by language using the pilosa index'
Statements:
- CREATE INDEX language_idx ON files USING pilosa (language(file_path, blob_content)) WITH (async = false)
- SELECT file_path FROM files WHERE language(file_path, blob_content) = 'Go'
- DROP INDEX language_idx ON files
-
ID: 'query8'
Name: 'Query by language using the pilosalib index'
Statements:
- CREATE INDEX language_idx ON files USING pilosalib (language(file_path, blob_content)) WITH (async = false)
- SELECT file_path FROM files WHERE language(file_path, blob_content) = 'Go'
- DROP INDEX language_idx ON files
-
ID: 'query9'
Name: 'Query all files from HEAD'
Statements:
- SELECT cf.file_path, f.blob_content FROM ref_commits r NATURAL JOIN commit_files cf NATURAL JOIN files f WHERE r.ref_name = 'HEAD' AND r.history_index = 0
-
ID: 'query10'
ID: 'query11'
Name: 'Get all LICENSE blobs using pilosa index'
Statements:
- CREATE INDEX file_path_idx ON files USING pilosa (file_path) WITH (async = false)
- SELECT blob_content FROM files WHERE file_path = 'LICENSE'
- DROP INDEX file_path_idx ON files
-
ID: 'query11'
Name: 'Get all LICENSE blobs using pilosalib index'
Statements:
- CREATE INDEX file_path_idx ON files USING pilosalib (file_path) WITH (async = false)
- SELECT blob_content FROM files WHERE file_path = 'LICENSE'
- DROP INDEX file_path_idx ON files
-
ID: 'query12'
Name: '10 top repos by file count in HEAD'
Expand All @@ -85,28 +65,21 @@
Statements:
- SELECT * FROM (SELECT commit_author_email as author,count(*) as num_commits FROM commits GROUP BY commit_author_email) t ORDER BY num_commits DESC
-
ID: 'query15'
ID: 'query16'
Name: 'Union operation with pilosa index'
Statements:
- CREATE INDEX file_path_idx ON files USING pilosa (file_path) WITH (async = false)
- SELECT blob_content FROM files WHERE file_path = 'LICENSE' OR file_path = 'README.md'
- DROP INDEX file_path_idx ON files
-
ID: 'query16'
Name: 'Union operation with pilosalib index'
Statements:
- CREATE INDEX file_path_idx ON files USING pilosalib (file_path) WITH (async = false)
- SELECT blob_content FROM files WHERE file_path = 'LICENSE' OR file_path = 'README.md'
- DROP INDEX file_path_idx ON files
-
ID: 'query17'
Name: 'Count all commits with NOT operation'
Statements:
- SELECT COUNT(*) FROM commits WHERE NOT(commit_author_email = 'non existing email address');
-
ID: 'query18'
Name: 'Count all commits with NOT operation and pilosalib index'
Name: 'Count all commits with NOT operation and pilosa index'
Statements:
- CREATE INDEX email_idx ON commits USING pilosalib (commit_author_email) WITH (async = false)
- CREATE INDEX email_idx ON commits USING pilosa (commit_author_email) WITH (async = false)
- SELECT COUNT(*) FROM commits WHERE NOT(commit_author_email = 'non existing email address')
- DROP INDEX email_idx ON commits
14 changes: 3 additions & 11 deletions cmd/gitbase/command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/src-d/gitbase/internal/rule"

"github.com/opentracing/opentracing-go"
gopilosa "github.com/pilosa/go-pilosa"
"github.com/sirupsen/logrus"
"github.com/uber/jaeger-client-go/config"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
Expand All @@ -22,7 +21,6 @@ import (
"gopkg.in/src-d/go-mysql-server.v0/sql"
"gopkg.in/src-d/go-mysql-server.v0/sql/analyzer"
"gopkg.in/src-d/go-mysql-server.v0/sql/index/pilosa"
"gopkg.in/src-d/go-mysql-server.v0/sql/index/pilosalib"
"gopkg.in/src-d/go-vitess.v0/mysql"
)

Expand All @@ -48,7 +46,6 @@ type Server struct {
Port int `short:"p" long:"port" default:"3306" description:"Port where the server is going to listen"`
User string `short:"u" long:"user" default:"root" description:"User name used for connection"`
Password string `short:"P" long:"password" default:"" description:"Password used for connection"`
PilosaURL string `long:"pilosa" default:"http://localhost:10101" description:"URL to your pilosa server" env:"PILOSA_ENDPOINT"`
IndexDir string `short:"i" long:"index" default:"/var/lib/gitbase/index" description:"Directory where the gitbase indexes information will be persisted." env:"GITBASE_INDEX_DIR"`
CacheSize cache.FileSize `long:"cache" default:"512" description:"Object cache size in megabytes" env:"GITBASE_CACHESIZE_MB"`
Parallelism uint `long:"parallelism" description:"Maximum number of parallel threads per table. By default, it's the number of CPU cores. 0 means default, 1 means disabled."`
Expand Down Expand Up @@ -217,14 +214,9 @@ func (c *Server) registerDrivers() error {

logrus.Debug("created index storage")

if client, err := gopilosa.NewClient(c.PilosaURL); err == nil {
logrus.Debug("established connection with pilosa")
c.engine.Catalog.RegisterIndexDriver(pilosa.NewDriver(filepath.Join(c.IndexDir, pilosa.DriverID), client))
} else {
logrus.WithError(err).Warn("cannot connect to pilosa")
}

c.engine.Catalog.RegisterIndexDriver(pilosalib.NewDriver(filepath.Join(c.IndexDir, pilosalib.DriverID)))
c.engine.Catalog.RegisterIndexDriver(
pilosa.NewDriver(filepath.Join(c.IndexDir, pilosa.DriverID)),
)
logrus.Debug("registered pilosa index driver")

return nil
Expand Down
2 changes: 0 additions & 2 deletions docs/using-gitbase/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
| Name | Description |
|:-----------------------------|:-----------------------------------------------------------------------------------|
| `BBLFSH_ENDPOINT` | bblfshd endpoint, default "127.0.0.1:9432" |
| `PILOSA_ENDPOINT` | pilosa endpoint, default "http://localhost:10101" |
| `GITBASE_BLOBS_MAX_SIZE` | maximum blob size to return in MiB, default 5 MiB |
| `GITBASE_BLOBS_ALLOW_BINARY` | enable retrieval of binary blobs, default `false` |
| `GITBASE_SKIP_GIT_ERRORS` | do not stop queries on git errors, default disabled |
Expand Down Expand Up @@ -73,7 +72,6 @@ Help Options:
-p, --port= Port where the server is going to listen (default: 3306)
-u, --user= User name used for connection (default: root)
-P, --password= Password used for connection
--pilosa= URL to your pilosa server (default: http://localhost:10101) [$PILOSA_ENDPOINT]
-i, --index= Directory where the gitbase indexes information will be persisted. (default: /var/lib/gitbase/index) [$GITBASE_INDEX_DIR]
--no-squash Disables the table squashing.
--trace Enables jaeger tracing [$GITBASE_TRACE]
Expand Down
11 changes: 0 additions & 11 deletions docs/using-gitbase/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
`gitbase` has two optional dependencies that should be running on your system if you're planning on using certain functionality.

- [bblfsh](https://github.com/bblfsh/bblfshd) >= 2.6.1 (only if you're planning to use the `UAST` functionality provided in gitbase)
- [pilosa](https://github.com/pilosa/pilosa) 0.9.0 (only if you're planning on using indexes)

## Installing gitbase

Expand All @@ -18,16 +17,6 @@ You can use the official image from [docker hub](https://hub.docker.com/r/srcd/g
docker run --rm --name gitbase -p 3306:3306 -v /my/git/repos:/opt/repos srcd/gitbase:latest
```

If you want to speedup gitbase using indexes you must run a pilosa container:
```
docker run -it --rm --name pilosa -p 10101:10101 pilosa/pilosa:v0.9.0
```

Then link the gitbase container to the pilosa one:
```
docker run --rm --name gitbase -p 3306:3306 --link pilosa:pilosa -e PILOSA_ENDPOINT="http://pilosa:10101" -v /my/git/repos:/opt/repos srcd/gitbase:latest
```

**Note:** remember to replace `/my/git/repos` with the local path where your repositories are stored in your computer.

### Download and use the binary
Expand Down
2 changes: 1 addition & 1 deletion docs/using-gitbase/indexes.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

`gitbase` allows you to speed up queries creating indexes.

Indexes are implemented as bitmaps using [pilosa](https://github.com/pilosa/pilosa) as a backend storage for them. To run a pilosa instance see the [getting started](./getting-started.md) section.
Indexes are implemented as bitmaps using [pilosa](https://github.com/pilosa/pilosa) as a backend storage for them.

Thus, to create indexes you must specify pilosa as the type of index. You can find some examples in the [examples](./examples.md#create-an-index-for-columns-on-a-table) section about managing indexes.

Expand Down
4 changes: 1 addition & 3 deletions regression_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/stretchr/testify/require"
"gopkg.in/src-d/go-mysql-server.v0/sql"
"gopkg.in/src-d/go-mysql-server.v0/sql/index/pilosa"
"gopkg.in/src-d/go-mysql-server.v0/sql/index/pilosalib"
yaml "gopkg.in/yaml.v2"
)

Expand All @@ -29,8 +28,7 @@ func TestRegressionQueries(t *testing.T) {
tmpDir, err := ioutil.TempDir(os.TempDir(), "pilosa-idx-gitbase")
require.NoError(err)
defer os.RemoveAll(tmpDir)
engine.Catalog.RegisterIndexDriver(pilosa.NewIndexDriver(tmpDir))
engine.Catalog.RegisterIndexDriver(pilosalib.NewDriver(tmpDir))
engine.Catalog.RegisterIndexDriver(pilosa.NewDriver(tmpDir))

ctx := sql.NewContext(
context.TODO(),
Expand Down

0 comments on commit 3b15073

Please sign in to comment.