Skip to content

Commit

Permalink
Organize for library use.
Browse files Browse the repository at this point in the history
Move main()-related code to subdirectory.

Name packages better.

But leave bad globals sitting aroung.
  • Loading branch information
jsccast committed Oct 27, 2014
1 parent 05a086e commit 1cf88de
Show file tree
Hide file tree
Showing 19 changed files with 87 additions and 74 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
*~
tinygraph
*.db/
wn31.nt.gz
TAGS
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

Goal: A simple and relatively efficient graph data store that can
handle billions of vertexes on single machine. In particular, we
wanted a local copy of [Freebase](https://www.freebase.com/) and
similar knowledge bases.
wanted a local copy of [Freebase](https://www.freebase.com/).

This project is called "<em>Tiny</em>graph" because the codebase is
tiny. It just doesn't do much, but it's pretty efficient and easy to
use.

Status: Experimental.
Status: Experimental. Definitely not over-engineered.

What it can do:

Expand Down
1 change: 1 addition & 0 deletions config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"db_dir":"test.db"}
2 changes: 1 addition & 1 deletion example.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

import (
"fmt"
Expand Down
8 changes: 8 additions & 0 deletions flags.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package tinygraph

import "flag"

var onlyLang = flag.String("lang", "eng", "Only get these strings ('en' for Freebase; 'eng' for WordNet)")
var gzipin = flag.Bool("gzip", false, "Input triple files are gzipped")
var ignoreSilently = flag.Bool("silent-ignore", true, "Don't report when ingoring a triple")
var chanBufferSize = flag.Int("chanbuf", 16, "Traversal emission buffer")
11 changes: 10 additions & 1 deletion graph.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package main
package tinygraph

// How to read and write triples.

import (
"bytes"
"fmt"
"log"
"sync/atomic"

rocks "github.csv.comcast.com/jsteph206/gorocksdb"
Expand All @@ -26,6 +27,14 @@ func NewGraph(path string, opts *rocks.Options) (*Graph, error) {
return &Graph{db, opts, nil, nil, uint64(0)}, nil
}

func (g *Graph) Compact() {
log.Printf("starting initial compaction %s\n", NowStringMillis())
ff := byte(0xff)
r := rocks.Range{[]byte{}, []byte{ff, ff, ff, ff, ff, ff, ff, ff, ff}}
g.db.CompactRange(r)
log.Printf("completed initial compaction %s\n", NowStringMillis())
}

func (g *Graph) IncWrites(n uint64) uint64 {
return atomic.AddUint64(&g.writes, n)
}
Expand Down
2 changes: 1 addition & 1 deletion options.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

// Most of the options are delegated to RocksDB.

Expand Down
2 changes: 1 addition & 1 deletion quads.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

// A fairly fragile RDF triple (quad) parser.

Expand Down
2 changes: 1 addition & 1 deletion quads_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

import (
"fmt"
Expand Down
40 changes: 37 additions & 3 deletions repl.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

// Expose some Go functions to Javascript.

Expand Down Expand Up @@ -34,6 +34,40 @@ func (e *Env) Open(config string) *Graph {
return g
}

func GetGraph(configFilename string) (*Graph, *Options) {
config, err := LoadOptions(configFilename)
if err != nil {
panic(err)
}

opts := RocksOpts(config)
opts.SetCreateIfMissing(true)
opts.SetErrorIfExists(false)

dirname := "tmp.db"
if dir, ok := config.StringKey("db_dir"); ok {
dirname = dir
}

g, err := NewGraph(dirname, opts)

if err != nil {
panic(err)
}

g.wopts = RocksWriteOpts(config)
g.ropts = RocksReadOpts(config)

return g, config
}

var SharedGraph *Graph

// Graph returns the global graph. Sorry.
func (e *Env) Graph() *Graph {
return SharedGraph
}

func (e *Env) Out(p []byte) *Stepper {
return Out(p)
}
Expand Down Expand Up @@ -109,7 +143,7 @@ func (e *Env) Scan(g *Graph, s []byte, limit int64) [][]string {
return acc
}

func initEnv(vm *otto.Otto) {
func InitEnv(vm *otto.Otto) {
vm.Set("G", new(Env))

vm.Set("toJS", func(call otto.FunctionCall) otto.Value {
Expand All @@ -124,7 +158,7 @@ func initEnv(vm *otto.Otto) {
func REPL() {
scanner := bufio.NewScanner(os.Stdin)
vm := otto.New()
initEnv(vm)
InitEnv(vm)
// Complete statement/expression must be on one line.
for scanner.Scan() {
line := scanner.Text()
Expand Down
2 changes: 1 addition & 1 deletion steps.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

// This code provides a slightly higher-level interface to Graph.Do().
// In(), Out(), Do(), and Walk() are the top-level functions. See
Expand Down
2 changes: 1 addition & 1 deletion steps_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

import (
"fmt"
Expand Down
2 changes: 2 additions & 0 deletions tinygraph/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tinygraph
test.db/
1 change: 1 addition & 0 deletions tinygraph/config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"db_dir":"test.db"}
15 changes: 4 additions & 11 deletions httpd.go → tinygraph/httpd.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,20 @@ import (
"net/http"

"github.com/robertkrimen/otto"
. "github.csv.comcast.com/jsteph206/tinygraph"
)

// We have a sad global for the graph given by the configFile.
var httpdGraph *Graph

// We have a single Javascript interpreter, which we probably shouldn't.
var httpVM *otto.Otto

func runHttpd() {
log.Printf("Opening config %s", *configFile)
httpdGraph, _ = GetGraph(*configFile)
SharedGraph, _ = GetGraph(*configFile)
http.HandleFunc("/js", handleJavascript)
log.Printf("Start HTTP server %s", *httpPort)
log.Printf("Done with HTTP server (%v)", http.ListenAndServe(*httpPort, nil))
}

// Graph returns the global graph. Bad.
func (e *Env) Graph() *Graph {
return httpdGraph
}

func handleJavascript(w http.ResponseWriter, r *http.Request) {
r.ParseForm()
js := r.FormValue("js")
Expand All @@ -52,12 +45,12 @@ func handleJavascript(w http.ResponseWriter, r *http.Request) {
if *sharedHttpVM {
if httpVM == nil {
httpVM = otto.New()
initEnv(httpVM)
InitEnv(httpVM)
}
vm = httpVM
} else {
vm = otto.New()
initEnv(vm)
InitEnv(vm)
}

o, err := vm.Run(js)
Expand Down
57 changes: 10 additions & 47 deletions main.go → tinygraph/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,34 @@ package main
import (
"flag"
"fmt"
rocks "github.csv.comcast.com/jsteph206/gorocksdb"
"io/ioutil"
"log"
"os"
"runtime"
"strings"
"sync"
"time"

. "github.csv.comcast.com/jsteph206/tinygraph"
)

var filesToLoad = flag.String("load", "", "Files to load")
var repl = flag.Bool("repl", false, "Run REPL")
var serve = flag.Bool("serve", false, "Start HTTPD server")
var onlyLang = flag.String("lang", "eng", "Only get these strings ('en' for Freebase; 'eng' for WordNet)")
var configFile = flag.String("config", "config.js", "Configuration file")
var sharedHttpVM = flag.Bool("sharevm", true, "Use a shared Javascript VM for the HTTP service")
var chanBufferSize = flag.Int("chanbuf", 16, "Traversal emission buffer")
var httpPort = flag.String("port", ":8080", "HTTP server port")
var gzipin = flag.Bool("gzip", false, "Input triple files are gzipped")
var ignoreSilently = flag.Bool("silent-ignore", true, "Don't report when ingoring a triple")

func RationalizeMaxProcs() {
if os.Getenv("GOMAXPROCS") == "" {
n := runtime.NumCPU()
fmt.Printf("Setting GOMAXPROCS to %d\n", n)
log.Printf("Setting GOMAXPROCS to %d\n", n)
runtime.GOMAXPROCS(n)
} else {
fmt.Printf("GOMAXPROCS is %v\n", os.Getenv("GOMAXPROCS"))
log.Printf("GOMAXPROCS is %v\n", os.Getenv("GOMAXPROCS"))
}
}

func CompactEverything(g *Graph) {
fmt.Printf("starting initial compaction %s\n", NowStringMillis())
ff := byte(0xff)
r := rocks.Range{[]byte{}, []byte{ff, ff, ff, ff, ff, ff, ff, ff, ff}}
g.db.CompactRange(r)
fmt.Printf("completed initial compaction %s\n", NowStringMillis())
}

func WriteStatsLoop(g *Graph) {
go func() {
for {
Expand All @@ -51,40 +41,13 @@ func WriteStatsLoop(g *Graph) {
}()
}

func GetGraph(configFilename string) (*Graph, *Options) {
config, err := LoadOptions(configFilename)
if err != nil {
panic(err)
}

opts := RocksOpts(config)
opts.SetCreateIfMissing(true)
opts.SetErrorIfExists(false)

dirname := "tmp.db"
if dir, ok := config.StringKey("db_dir"); ok {
dirname = dir
}

g, err := NewGraph(dirname, opts)

if err != nil {
panic(err)
}

g.wopts = RocksWriteOpts(config)
g.ropts = RocksReadOpts(config)

return g, config
}

func Load() {
g, config := GetGraph(*configFile)
fmt.Println(g.GetStats())
log.Println(g.GetStats())

if b, ok := config.BoolKey("initial_compaction"); ok && b {
CompactEverything(g)
fmt.Println(g.GetStats())
g.Compact()
log.Println(g.GetStats())
}

if b, ok := config.BoolKey("stats_loop"); ok && b {
Expand All @@ -94,15 +57,15 @@ func Load() {
wait := sync.WaitGroup{}
for _, filename := range strings.Split(*filesToLoad, ",") {
filename = strings.TrimSpace(filename)
fmt.Printf("loading triples: %s\n", filename)
log.Printf("loading triples: %s\n", filename)
wait.Add(1)
go g.LoadTriplesFile(filename, config, &wait)
// Stagger the threads a little.
time.Sleep(1 * time.Second)
}
wait.Wait()

fmt.Println(g.GetStats())
log.Println(g.GetStats())

err := g.Close()
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions main_test.go → tinygraph/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"fmt"
"testing"
. "github.csv.comcast.com/jsteph206/tinygraph"
)

func TinyTest(g *Graph) {
Expand Down
2 changes: 1 addition & 1 deletion triples.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

import (
"fmt"
Expand Down
2 changes: 1 addition & 1 deletion util.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package tinygraph

import (
"bufio"
Expand Down

0 comments on commit 1cf88de

Please sign in to comment.