Skip to content

Commit

Permalink
Add foursquare package (#6)
Browse files Browse the repository at this point in the history
* update vendor deps

* add 4sq package

* more deps, not duckdb builds

* add cmd/embeddings tool

---------

Co-authored-by: sfomuseumbot <sfomuseumbot@localhost>
  • Loading branch information
thisisaaronland and sfomuseumbot authored Jan 29, 2025
1 parent 09f14b2 commit cc0eeeb
Show file tree
Hide file tree
Showing 1,113 changed files with 40,546 additions and 14,743 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
bin
work
*.db
libduckdb.a
1 change: 1 addition & 0 deletions cmd/compare-locations/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

_ "github.com/mattn/go-sqlite3"
_ "github.com/whosonfirst/go-dedupe/alltheplaces"
_ "github.com/whosonfirst/go-dedupe/foursquare"
_ "github.com/whosonfirst/go-dedupe/ilms"
_ "github.com/whosonfirst/go-dedupe/overture"
_ "github.com/whosonfirst/go-dedupe/whosonfirst"
Expand Down
77 changes: 77 additions & 0 deletions cmd/embeddings/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package main

import (
"context"
"encoding/json"
"flag"
"io"
"log"
"log/slog"
"os"

"github.com/whosonfirst/go-dedupe/embeddings"
)

const stdin string = "-"

func main() {

var embedder_uri string
var is_image bool

flag.StringVar(&embedder_uri, "embedder-uri", "null://", "A registered whosonfirst/go-dedupe/embeddings.Embedder URI.")
flag.BoolVar(&is_image, "image", false, "A boolean flag indicating whether to derive embeddings for an image.")

flag.Parse()

ctx := context.Background()

emb, err := embeddings.NewEmbedder(ctx, embedder_uri)

if err != nil {
log.Fatalf("Failed to create embedder, %v", err)
}

for _, path := range flag.Args() {

var data []float64
var input []byte
var err error

if path == stdin {

input, err = io.ReadAll(os.Stdin)

if err != nil {
log.Fatalf("Failed to read data from STDIN, %v", err)
}

} else {

input, err = os.ReadFile(path)

if err != nil {
log.Fatalf("Failed to read data from %s, %v", path, err)
}

}

if is_image {
data, err = emb.ImageEmbeddings(ctx, input)
} else {
data, err = emb.Embeddings(ctx, string(input))
}

if err != nil {
log.Fatalf("Failed to derive embeddings, %v", err)
}

enc := json.NewEncoder(os.Stdout)
err = enc.Encode(data)

if err != nil {
log.Fatalf("Failed to encode embeddings, %v", err)
}
}

}
1 change: 1 addition & 0 deletions cmd/index-locations/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

_ "github.com/mattn/go-sqlite3"
_ "github.com/whosonfirst/go-dedupe/alltheplaces"
_ "github.com/whosonfirst/go-dedupe/foursquare"
_ "github.com/whosonfirst/go-dedupe/ilms"
_ "github.com/whosonfirst/go-dedupe/overture"
_ "github.com/whosonfirst/go-dedupe/whosonfirst"
Expand Down
2 changes: 1 addition & 1 deletion compare/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func CompareLocationDatabases(ctx context.Context, opts *CompareLocationDatabase
logger := slog.Default()
logger = logger.With("geohash", geohash)

logger.Debug("Process geohash")
logger.Info("Process geohash")

source_suffix := fmt.Sprintf("*-%s-source.jsonl", geohash)
target_suffix := fmt.Sprintf("*-%s-target.jsonl", geohash)
Expand Down
5 changes: 3 additions & 2 deletions compare/geohash.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func CompareLocationsForGeohash(ctx context.Context, opts *CompareLocationsForGe

t1 := time.Now()

// logger.Info("Walk sources", "path", opts.SourceLocations)
logger.Info("Walk sources", "path", opts.SourceLocations)
err = walk_reader(ctx, source_r, source_walk_cb)

if err != nil {
Expand All @@ -138,7 +138,7 @@ func CompareLocationsForGeohash(ctx context.Context, opts *CompareLocationsForGe
geohash := opts.Geohash
threshold := opts.Threshold

logger.Debug("Compare location from target database", "location", loc.String())
logger.Info("Compare location from target database", "location", loc.String())

// t1 := time.Now()

Expand Down Expand Up @@ -235,6 +235,7 @@ func walk_reader(ctx context.Context, r io.Reader, cb func(ctx context.Context,
case r := <-record_ch:

err := cb(ctx, r.Path, r)
// slog.Info("Process", "path", r.Path, "error", err)

r.CompletedChannel <- true

Expand Down
2 changes: 1 addition & 1 deletion embeddings/llamafile.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import (
_ "io"
"net/http"
"net/url"
"strings"
"strconv"
"strings"
"time"
)

Expand Down
1 change: 1 addition & 0 deletions foursquare/foursqure.go
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package foursquare
88 changes: 88 additions & 0 deletions foursquare/iterator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package foursquare

// ./bin/index-locations -iterator-uri 'foursquare://?emitter-uri=csv:///usr/local/data/4sq/4sq.csv.bz2' -location-parser-uri 'foursquare://' -location-database-uri 'sql://sqlite3?dsn=/usr/local/data/4sq/4sq-locations.db'

import (
"context"
"encoding/json"
"fmt"
"net/url"

"github.com/whosonfirst/go-dedupe/iterator"
"github.com/whosonfirst/go-foursquare-places/emitter"
)

type FoursquareIterator struct {
iterator.Iterator
emitter emitter.Emitter
}

func init() {
ctx := context.Background()
err := iterator.RegisterIterator(ctx, "foursquare", NewFoursquareIterator)
if err != nil {
panic(err)
}
}

func NewFoursquareIterator(ctx context.Context, uri string) (iterator.Iterator, error) {

u, err := url.Parse(uri)

if err != nil {
return nil, err
}

q := u.Query()

emitter_uri := q.Get("emitter-uri")

e, err := emitter.NewEmitter(ctx, emitter_uri)

if err != nil {
return nil, err
}

iter := &FoursquareIterator{
emitter: e,
}

return iter, nil
}

func (iter *FoursquareIterator) IterateWithCallback(ctx context.Context, cb iterator.IteratorCallback, uris ...string) error {

var iter_err error

for pl, err := range iter.emitter.Emit(ctx) {

if err != nil {
iter_err = fmt.Errorf("Failed to iterate places, %w", err)
break
}

body, err := json.Marshal(pl)

if err != nil {
iter_err = fmt.Errorf("Failed to marshal place %s, %w", pl, err)
break
}

err = cb(ctx, body)

if err != nil {
iter_err = fmt.Errorf("Failed to execute callback for place %s, %w", pl, err)
break
}
}

if iter_err != nil {
return iter_err
}

return nil
}

func (iter *FoursquareIterator) Close(ctx context.Context) error {
return iter.emitter.Close()
}
91 changes: 91 additions & 0 deletions foursquare/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package foursquare

import (
"context"
"fmt"
_ "log/slog"
"strings"

"github.com/paulmach/orb"
"github.com/tidwall/gjson"
"github.com/whosonfirst/go-dedupe"
"github.com/whosonfirst/go-dedupe/location"
)

type FoursquarePlaceParser struct {
location.Parser
addr_keys []string
}

func init() {
ctx := context.Background()
err := location.RegisterParser(ctx, "foursquare", NewFoursquarePlaceParser)

if err != nil {
panic(err)
}
}

func NewFoursquarePlaceParser(ctx context.Context, uri string) (location.Parser, error) {

addr_keys := []string{
"address",
"po_box",
"post_town",
"region",
"admin_regin",
"post_code",
"country",
}

p := &FoursquarePlaceParser{
addr_keys: addr_keys,
}

return p, nil
}

func (p *FoursquarePlaceParser) Parse(ctx context.Context, body []byte) (*location.Location, error) {

id_rsp := gjson.GetBytes(body, "fsq_place_id")
id := id_rsp.String()

name_rsp := gjson.GetBytes(body, "name")
name := name_rsp.String()

addr_components := make([]string, 0)

for _, k := range p.addr_keys {

rsp := gjson.GetBytes(body, k)

if rsp.Exists() && rsp.String() != "" {
addr_components = append(addr_components, rsp.String())
}
}

if len(addr_components) == 0 {
return nil, dedupe.InvalidRecord(id, fmt.Errorf("Missing 'address' properties"))
}

// Something something something libpostal...

addr := strings.Join(addr_components, " ")

lat_rsp := gjson.GetBytes(body, "latitude")
lon_rsp := gjson.GetBytes(body, "longitude")

lat := lat_rsp.Float()
lon := lon_rsp.Float()

centroid := orb.Point([2]float64{lon, lat})

c := &location.Location{
ID: id,
Name: name,
Address: addr,
Centroid: &centroid,
}

return c, nil
}
Loading

0 comments on commit cc0eeeb

Please sign in to comment.