-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* update vendor deps * add 4sq package * more deps, not duckdb builds * add cmd/embeddings tool --------- Co-authored-by: sfomuseumbot <sfomuseumbot@localhost>
- Loading branch information
1 parent
09f14b2
commit cc0eeeb
Showing
1,113 changed files
with
40,546 additions
and
14,743 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
bin | ||
work | ||
*.db | ||
libduckdb.a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"flag" | ||
"io" | ||
"log" | ||
"log/slog" | ||
"os" | ||
|
||
"github.com/whosonfirst/go-dedupe/embeddings" | ||
) | ||
|
||
const stdin string = "-" | ||
|
||
func main() { | ||
|
||
var embedder_uri string | ||
var is_image bool | ||
|
||
flag.StringVar(&embedder_uri, "embedder-uri", "null://", "A registered whosonfirst/go-dedupe/embeddings.Embedder URI.") | ||
flag.BoolVar(&is_image, "image", false, "A boolean flag indicating whether to derive embeddings for an image.") | ||
|
||
flag.Parse() | ||
|
||
ctx := context.Background() | ||
|
||
emb, err := embeddings.NewEmbedder(ctx, embedder_uri) | ||
|
||
if err != nil { | ||
log.Fatalf("Failed to create embedder, %v", err) | ||
} | ||
|
||
for _, path := range flag.Args() { | ||
|
||
var data []float64 | ||
var input []byte | ||
var err error | ||
|
||
if path == stdin { | ||
|
||
input, err = io.ReadAll(os.Stdin) | ||
|
||
if err != nil { | ||
log.Fatalf("Failed to read data from STDIN, %v", err) | ||
} | ||
|
||
} else { | ||
|
||
input, err = os.ReadFile(path) | ||
|
||
if err != nil { | ||
log.Fatalf("Failed to read data from %s, %v", path, err) | ||
} | ||
|
||
} | ||
|
||
if is_image { | ||
data, err = emb.ImageEmbeddings(ctx, input) | ||
} else { | ||
data, err = emb.Embeddings(ctx, string(input)) | ||
} | ||
|
||
if err != nil { | ||
log.Fatalf("Failed to derive embeddings, %v", err) | ||
} | ||
|
||
enc := json.NewEncoder(os.Stdout) | ||
err = enc.Encode(data) | ||
|
||
if err != nil { | ||
log.Fatalf("Failed to encode embeddings, %v", err) | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,8 @@ import ( | |
_ "io" | ||
"net/http" | ||
"net/url" | ||
"strings" | ||
"strconv" | ||
"strings" | ||
"time" | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
package foursquare |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
package foursquare | ||
|
||
// ./bin/index-locations -iterator-uri 'foursquare://?emitter-uri=csv:///usr/local/data/4sq/4sq.csv.bz2' -location-parser-uri 'foursquare://' -location-database-uri 'sql://sqlite3?dsn=/usr/local/data/4sq/4sq-locations.db' | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"fmt" | ||
"net/url" | ||
|
||
"github.com/whosonfirst/go-dedupe/iterator" | ||
"github.com/whosonfirst/go-foursquare-places/emitter" | ||
) | ||
|
||
type FoursquareIterator struct { | ||
iterator.Iterator | ||
emitter emitter.Emitter | ||
} | ||
|
||
func init() { | ||
ctx := context.Background() | ||
err := iterator.RegisterIterator(ctx, "foursquare", NewFoursquareIterator) | ||
if err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
func NewFoursquareIterator(ctx context.Context, uri string) (iterator.Iterator, error) { | ||
|
||
u, err := url.Parse(uri) | ||
|
||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
q := u.Query() | ||
|
||
emitter_uri := q.Get("emitter-uri") | ||
|
||
e, err := emitter.NewEmitter(ctx, emitter_uri) | ||
|
||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
iter := &FoursquareIterator{ | ||
emitter: e, | ||
} | ||
|
||
return iter, nil | ||
} | ||
|
||
func (iter *FoursquareIterator) IterateWithCallback(ctx context.Context, cb iterator.IteratorCallback, uris ...string) error { | ||
|
||
var iter_err error | ||
|
||
for pl, err := range iter.emitter.Emit(ctx) { | ||
|
||
if err != nil { | ||
iter_err = fmt.Errorf("Failed to iterate places, %w", err) | ||
break | ||
} | ||
|
||
body, err := json.Marshal(pl) | ||
|
||
if err != nil { | ||
iter_err = fmt.Errorf("Failed to marshal place %s, %w", pl, err) | ||
break | ||
} | ||
|
||
err = cb(ctx, body) | ||
|
||
if err != nil { | ||
iter_err = fmt.Errorf("Failed to execute callback for place %s, %w", pl, err) | ||
break | ||
} | ||
} | ||
|
||
if iter_err != nil { | ||
return iter_err | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (iter *FoursquareIterator) Close(ctx context.Context) error { | ||
return iter.emitter.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
package foursquare | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
_ "log/slog" | ||
"strings" | ||
|
||
"github.com/paulmach/orb" | ||
"github.com/tidwall/gjson" | ||
"github.com/whosonfirst/go-dedupe" | ||
"github.com/whosonfirst/go-dedupe/location" | ||
) | ||
|
||
type FoursquarePlaceParser struct { | ||
location.Parser | ||
addr_keys []string | ||
} | ||
|
||
func init() { | ||
ctx := context.Background() | ||
err := location.RegisterParser(ctx, "foursquare", NewFoursquarePlaceParser) | ||
|
||
if err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
func NewFoursquarePlaceParser(ctx context.Context, uri string) (location.Parser, error) { | ||
|
||
addr_keys := []string{ | ||
"address", | ||
"po_box", | ||
"post_town", | ||
"region", | ||
"admin_regin", | ||
"post_code", | ||
"country", | ||
} | ||
|
||
p := &FoursquarePlaceParser{ | ||
addr_keys: addr_keys, | ||
} | ||
|
||
return p, nil | ||
} | ||
|
||
func (p *FoursquarePlaceParser) Parse(ctx context.Context, body []byte) (*location.Location, error) { | ||
|
||
id_rsp := gjson.GetBytes(body, "fsq_place_id") | ||
id := id_rsp.String() | ||
|
||
name_rsp := gjson.GetBytes(body, "name") | ||
name := name_rsp.String() | ||
|
||
addr_components := make([]string, 0) | ||
|
||
for _, k := range p.addr_keys { | ||
|
||
rsp := gjson.GetBytes(body, k) | ||
|
||
if rsp.Exists() && rsp.String() != "" { | ||
addr_components = append(addr_components, rsp.String()) | ||
} | ||
} | ||
|
||
if len(addr_components) == 0 { | ||
return nil, dedupe.InvalidRecord(id, fmt.Errorf("Missing 'address' properties")) | ||
} | ||
|
||
// Something something something libpostal... | ||
|
||
addr := strings.Join(addr_components, " ") | ||
|
||
lat_rsp := gjson.GetBytes(body, "latitude") | ||
lon_rsp := gjson.GetBytes(body, "longitude") | ||
|
||
lat := lat_rsp.Float() | ||
lon := lon_rsp.Float() | ||
|
||
centroid := orb.Point([2]float64{lon, lat}) | ||
|
||
c := &location.Location{ | ||
ID: id, | ||
Name: name, | ||
Address: addr, | ||
Centroid: ¢roid, | ||
} | ||
|
||
return c, nil | ||
} |
Oops, something went wrong.