From 1cf88dec65aeeb5218ae7aadf05d5d5a5882acc2 Mon Sep 17 00:00:00 2001 From: Jamie Stephens Date: Mon, 27 Oct 2014 10:59:35 -0500 Subject: [PATCH] Organize for library use. Move main()-related code to subdirectory. Name packages better. But leave bad globals sitting aroung. --- .gitignore | 4 +- README.md | 5 +-- config.js | 1 + example.go | 2 +- flags.go | 8 ++++ graph.go | 11 ++++- options.go | 2 +- quads.go | 2 +- quads_test.go | 2 +- repl.go | 40 ++++++++++++++++-- steps.go | 2 +- steps_test.go | 2 +- tinygraph/.gitignore | 2 + tinygraph/config.js | 1 + httpd.go => tinygraph/httpd.go | 15 ++----- main.go => tinygraph/main.go | 57 +++++--------------------- main_test.go => tinygraph/main_test.go | 1 + triples.go | 2 +- util.go | 2 +- 19 files changed, 87 insertions(+), 74 deletions(-) create mode 100644 config.js create mode 100644 flags.go create mode 100644 tinygraph/.gitignore create mode 100644 tinygraph/config.js rename httpd.go => tinygraph/httpd.go (87%) rename main.go => tinygraph/main.go (57%) rename main_test.go => tinygraph/main_test.go (97%) diff --git a/.gitignore b/.gitignore index 1212cd7..b352929 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *~ -tinygraph +*.db/ +wn31.nt.gz +TAGS diff --git a/README.md b/README.md index 3694595..08c4132 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,13 @@ Goal: A simple and relatively efficient graph data store that can handle billions of vertexes on single machine. In particular, we -wanted a local copy of [Freebase](https://www.freebase.com/) and -similar knowledge bases. +wanted a local copy of [Freebase](https://www.freebase.com/). This project is called "Tinygraph" because the codebase is tiny. It just doesn't do much, but it's pretty efficient and easy to use. -Status: Experimental. +Status: Experimental. Definitely not over-engineered. What it can do: diff --git a/config.js b/config.js new file mode 100644 index 0000000..0eb11a2 --- /dev/null +++ b/config.js @@ -0,0 +1 @@ +{"db_dir":"test.db"} diff --git a/example.go b/example.go index 58ee3a9..4be1329 100644 --- a/example.go +++ b/example.go @@ -1,4 +1,4 @@ -package main +package tinygraph import ( "fmt" diff --git a/flags.go b/flags.go new file mode 100644 index 0000000..9bf32e3 --- /dev/null +++ b/flags.go @@ -0,0 +1,8 @@ +package tinygraph + +import "flag" + +var onlyLang = flag.String("lang", "eng", "Only get these strings ('en' for Freebase; 'eng' for WordNet)") +var gzipin = flag.Bool("gzip", false, "Input triple files are gzipped") +var ignoreSilently = flag.Bool("silent-ignore", true, "Don't report when ingoring a triple") +var chanBufferSize = flag.Int("chanbuf", 16, "Traversal emission buffer") diff --git a/graph.go b/graph.go index ca9ac6b..302e7f9 100644 --- a/graph.go +++ b/graph.go @@ -1,10 +1,11 @@ -package main +package tinygraph // How to read and write triples. import ( "bytes" "fmt" + "log" "sync/atomic" rocks "github.csv.comcast.com/jsteph206/gorocksdb" @@ -26,6 +27,14 @@ func NewGraph(path string, opts *rocks.Options) (*Graph, error) { return &Graph{db, opts, nil, nil, uint64(0)}, nil } +func (g *Graph) Compact() { + log.Printf("starting initial compaction %s\n", NowStringMillis()) + ff := byte(0xff) + r := rocks.Range{[]byte{}, []byte{ff, ff, ff, ff, ff, ff, ff, ff, ff}} + g.db.CompactRange(r) + log.Printf("completed initial compaction %s\n", NowStringMillis()) +} + func (g *Graph) IncWrites(n uint64) uint64 { return atomic.AddUint64(&g.writes, n) } diff --git a/options.go b/options.go index d2f7384..03eda50 100644 --- a/options.go +++ b/options.go @@ -1,4 +1,4 @@ -package main +package tinygraph // Most of the options are delegated to RocksDB. diff --git a/quads.go b/quads.go index a1b992e..9a4f01f 100644 --- a/quads.go +++ b/quads.go @@ -1,4 +1,4 @@ -package main +package tinygraph // A fairly fragile RDF triple (quad) parser. diff --git a/quads_test.go b/quads_test.go index a4ff5a3..995df0d 100644 --- a/quads_test.go +++ b/quads_test.go @@ -1,4 +1,4 @@ -package main +package tinygraph import ( "fmt" diff --git a/repl.go b/repl.go index 7f6a2d3..c3dfa41 100644 --- a/repl.go +++ b/repl.go @@ -1,4 +1,4 @@ -package main +package tinygraph // Expose some Go functions to Javascript. @@ -34,6 +34,40 @@ func (e *Env) Open(config string) *Graph { return g } +func GetGraph(configFilename string) (*Graph, *Options) { + config, err := LoadOptions(configFilename) + if err != nil { + panic(err) + } + + opts := RocksOpts(config) + opts.SetCreateIfMissing(true) + opts.SetErrorIfExists(false) + + dirname := "tmp.db" + if dir, ok := config.StringKey("db_dir"); ok { + dirname = dir + } + + g, err := NewGraph(dirname, opts) + + if err != nil { + panic(err) + } + + g.wopts = RocksWriteOpts(config) + g.ropts = RocksReadOpts(config) + + return g, config +} + +var SharedGraph *Graph + +// Graph returns the global graph. Sorry. +func (e *Env) Graph() *Graph { + return SharedGraph +} + func (e *Env) Out(p []byte) *Stepper { return Out(p) } @@ -109,7 +143,7 @@ func (e *Env) Scan(g *Graph, s []byte, limit int64) [][]string { return acc } -func initEnv(vm *otto.Otto) { +func InitEnv(vm *otto.Otto) { vm.Set("G", new(Env)) vm.Set("toJS", func(call otto.FunctionCall) otto.Value { @@ -124,7 +158,7 @@ func initEnv(vm *otto.Otto) { func REPL() { scanner := bufio.NewScanner(os.Stdin) vm := otto.New() - initEnv(vm) + InitEnv(vm) // Complete statement/expression must be on one line. for scanner.Scan() { line := scanner.Text() diff --git a/steps.go b/steps.go index 3db9709..212a728 100644 --- a/steps.go +++ b/steps.go @@ -1,4 +1,4 @@ -package main +package tinygraph // This code provides a slightly higher-level interface to Graph.Do(). // In(), Out(), Do(), and Walk() are the top-level functions. See diff --git a/steps_test.go b/steps_test.go index b34d434..0f86476 100644 --- a/steps_test.go +++ b/steps_test.go @@ -1,4 +1,4 @@ -package main +package tinygraph import ( "fmt" diff --git a/tinygraph/.gitignore b/tinygraph/.gitignore new file mode 100644 index 0000000..8cee1ba --- /dev/null +++ b/tinygraph/.gitignore @@ -0,0 +1,2 @@ +tinygraph +test.db/ diff --git a/tinygraph/config.js b/tinygraph/config.js new file mode 100644 index 0000000..0eb11a2 --- /dev/null +++ b/tinygraph/config.js @@ -0,0 +1 @@ +{"db_dir":"test.db"} diff --git a/httpd.go b/tinygraph/httpd.go similarity index 87% rename from httpd.go rename to tinygraph/httpd.go index b232eda..b136e5d 100644 --- a/httpd.go +++ b/tinygraph/httpd.go @@ -10,27 +10,20 @@ import ( "net/http" "github.com/robertkrimen/otto" + . "github.csv.comcast.com/jsteph206/tinygraph" ) -// We have a sad global for the graph given by the configFile. -var httpdGraph *Graph - // We have a single Javascript interpreter, which we probably shouldn't. var httpVM *otto.Otto func runHttpd() { log.Printf("Opening config %s", *configFile) - httpdGraph, _ = GetGraph(*configFile) + SharedGraph, _ = GetGraph(*configFile) http.HandleFunc("/js", handleJavascript) log.Printf("Start HTTP server %s", *httpPort) log.Printf("Done with HTTP server (%v)", http.ListenAndServe(*httpPort, nil)) } -// Graph returns the global graph. Bad. -func (e *Env) Graph() *Graph { - return httpdGraph -} - func handleJavascript(w http.ResponseWriter, r *http.Request) { r.ParseForm() js := r.FormValue("js") @@ -52,12 +45,12 @@ func handleJavascript(w http.ResponseWriter, r *http.Request) { if *sharedHttpVM { if httpVM == nil { httpVM = otto.New() - initEnv(httpVM) + InitEnv(httpVM) } vm = httpVM } else { vm = otto.New() - initEnv(vm) + InitEnv(vm) } o, err := vm.Run(js) diff --git a/main.go b/tinygraph/main.go similarity index 57% rename from main.go rename to tinygraph/main.go index ac568c8..df4694c 100644 --- a/main.go +++ b/tinygraph/main.go @@ -3,44 +3,34 @@ package main import ( "flag" "fmt" - rocks "github.csv.comcast.com/jsteph206/gorocksdb" "io/ioutil" + "log" "os" "runtime" "strings" "sync" "time" + + . "github.csv.comcast.com/jsteph206/tinygraph" ) var filesToLoad = flag.String("load", "", "Files to load") var repl = flag.Bool("repl", false, "Run REPL") var serve = flag.Bool("serve", false, "Start HTTPD server") -var onlyLang = flag.String("lang", "eng", "Only get these strings ('en' for Freebase; 'eng' for WordNet)") var configFile = flag.String("config", "config.js", "Configuration file") var sharedHttpVM = flag.Bool("sharevm", true, "Use a shared Javascript VM for the HTTP service") -var chanBufferSize = flag.Int("chanbuf", 16, "Traversal emission buffer") var httpPort = flag.String("port", ":8080", "HTTP server port") -var gzipin = flag.Bool("gzip", false, "Input triple files are gzipped") -var ignoreSilently = flag.Bool("silent-ignore", true, "Don't report when ingoring a triple") func RationalizeMaxProcs() { if os.Getenv("GOMAXPROCS") == "" { n := runtime.NumCPU() - fmt.Printf("Setting GOMAXPROCS to %d\n", n) + log.Printf("Setting GOMAXPROCS to %d\n", n) runtime.GOMAXPROCS(n) } else { - fmt.Printf("GOMAXPROCS is %v\n", os.Getenv("GOMAXPROCS")) + log.Printf("GOMAXPROCS is %v\n", os.Getenv("GOMAXPROCS")) } } -func CompactEverything(g *Graph) { - fmt.Printf("starting initial compaction %s\n", NowStringMillis()) - ff := byte(0xff) - r := rocks.Range{[]byte{}, []byte{ff, ff, ff, ff, ff, ff, ff, ff, ff}} - g.db.CompactRange(r) - fmt.Printf("completed initial compaction %s\n", NowStringMillis()) -} - func WriteStatsLoop(g *Graph) { go func() { for { @@ -51,40 +41,13 @@ func WriteStatsLoop(g *Graph) { }() } -func GetGraph(configFilename string) (*Graph, *Options) { - config, err := LoadOptions(configFilename) - if err != nil { - panic(err) - } - - opts := RocksOpts(config) - opts.SetCreateIfMissing(true) - opts.SetErrorIfExists(false) - - dirname := "tmp.db" - if dir, ok := config.StringKey("db_dir"); ok { - dirname = dir - } - - g, err := NewGraph(dirname, opts) - - if err != nil { - panic(err) - } - - g.wopts = RocksWriteOpts(config) - g.ropts = RocksReadOpts(config) - - return g, config -} - func Load() { g, config := GetGraph(*configFile) - fmt.Println(g.GetStats()) + log.Println(g.GetStats()) if b, ok := config.BoolKey("initial_compaction"); ok && b { - CompactEverything(g) - fmt.Println(g.GetStats()) + g.Compact() + log.Println(g.GetStats()) } if b, ok := config.BoolKey("stats_loop"); ok && b { @@ -94,7 +57,7 @@ func Load() { wait := sync.WaitGroup{} for _, filename := range strings.Split(*filesToLoad, ",") { filename = strings.TrimSpace(filename) - fmt.Printf("loading triples: %s\n", filename) + log.Printf("loading triples: %s\n", filename) wait.Add(1) go g.LoadTriplesFile(filename, config, &wait) // Stagger the threads a little. @@ -102,7 +65,7 @@ func Load() { } wait.Wait() - fmt.Println(g.GetStats()) + log.Println(g.GetStats()) err := g.Close() if err != nil { diff --git a/main_test.go b/tinygraph/main_test.go similarity index 97% rename from main_test.go rename to tinygraph/main_test.go index 48b4f81..55c02bc 100644 --- a/main_test.go +++ b/tinygraph/main_test.go @@ -3,6 +3,7 @@ package main import ( "fmt" "testing" + . "github.csv.comcast.com/jsteph206/tinygraph" ) func TinyTest(g *Graph) { diff --git a/triples.go b/triples.go index b18cfb8..083ca38 100644 --- a/triples.go +++ b/triples.go @@ -1,4 +1,4 @@ -package main +package tinygraph import ( "fmt" diff --git a/util.go b/util.go index 654e908..f62900a 100644 --- a/util.go +++ b/util.go @@ -1,4 +1,4 @@ -package main +package tinygraph import ( "bufio"