Skip to content

Commit b40d096

Browse files
committed
Fold sorted vector into knn search and remove incomplete priority queue implementation
1 parent e89f172 commit b40d096

File tree

5 files changed

+57
-101
lines changed

5 files changed

+57
-101
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ Add the following dependency to your project:
3636

3737
* [Binary tree](https://cloudkj.github.io/lambda-ml/lambda-ml.data.binary-tree.html)
3838
* [K-d tree](https://cloudkj.github.io/lambda-ml/lambda-ml.data.kd-tree.html)
39-
* [Priority queue](https://cloudkj.github.io/lambda-ml/lambda-ml.data.priority-queue.html)
4039

4140
## Examples
4241

src/lambda_ml/data/priority_queue.clj

Lines changed: 0 additions & 52 deletions
This file was deleted.

src/lambda_ml/nearest_neighbors.clj

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,46 @@
1212
```"
1313
(:require [lambda-ml.core :refer :all]
1414
[lambda-ml.data.binary-tree :as bt]
15-
[lambda-ml.data.kd-tree :as kd]
16-
[lambda-ml.data.priority-queue :as pq]))
15+
[lambda-ml.data.kd-tree :as kd]))
16+
17+
(defn make-item
18+
[value priority]
19+
(vector priority value))
20+
21+
(defn item-priority
22+
[item]
23+
(nth item 0))
24+
25+
(defn item-value
26+
[item]
27+
(nth item 1))
28+
29+
(defn insert
30+
[v value priority bound]
31+
(let [full (>= (count v) bound)]
32+
(cond
33+
;; Empty vector
34+
(empty? v)
35+
(vector (make-item value priority))
36+
;; Full vector and item priority is too high
37+
(and full (>= priority (item-priority (peek v))))
38+
v
39+
:else
40+
;; Find position and insert item
41+
(let [index (loop [lo 0
42+
hi (count v)]
43+
(if (>= lo hi)
44+
lo
45+
(let [mid (quot (+ lo hi) 2)]
46+
(if (< priority (item-priority (nth v mid)))
47+
(recur lo mid)
48+
(recur (+ mid 1) hi)))))
49+
item (make-item value priority)
50+
end (if full (dec (count v)) (count v))]
51+
(apply conj
52+
(subvec v 0 index)
53+
item
54+
(subvec v index end))))))
1755

1856
(defn make-nearest-neighbor-search
1957
"Given a distance function f and a coll of items, each of which have an
@@ -29,7 +67,7 @@
2967
t (kd/make-tree dims items g)]
3068
(fn knn
3169
([k query]
32-
(knn k query t 0 (pq/make-queue)))
70+
(knn k query t 0 (vector)))
3371
([k query tree depth cand]
3472
(if (nil? tree)
3573
cand
@@ -41,13 +79,13 @@
4179
[near far] (if (<= (nth query-point dim) (nth node-point dim)) [left right] [right left])
4280
cand (->>
4381
;; Try to add current node to candidates
44-
(pq/insert cand node (f query-point node-point) k)
82+
(insert cand node (f query-point node-point) k)
4583
;; Explore near branch
4684
(knn k query near (inc depth)))]
4785
;; Optionally, explore far branch
4886
(if (or (< (count cand) k)
4987
(< (f query-point node-point dim)
50-
(pq/item-priority (pq/get-tail cand))))
88+
(item-priority (peek cand))))
5189
(knn k query far (inc depth) cand)
5290
cand))))))))
5391

@@ -67,7 +105,7 @@
67105
;; values in last position in training data examples
68106
(->> (map #(conj (vec %) nil) x)
69107
(map #(lookup k %))
70-
(map #(map (comp last pq/item-value) %))
108+
(map #(map (comp last item-value) %))
71109
(map agg)))))
72110

73111
(defn make-nearest-neighbors-classifier

test/lambda_ml/data/priority_queue_test.clj

Lines changed: 0 additions & 28 deletions
This file was deleted.

test/lambda_ml/nearest_neighbors_test.clj

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,21 @@
22
(:require [clojure.test :refer :all]
33
[clojure.set :refer :all]
44
[lambda-ml.nearest-neighbors :refer :all]
5-
[lambda-ml.distance :as d]
6-
[lambda-ml.data.priority-queue :as pq]))
5+
[lambda-ml.distance :as d]))
76

87
(deftest test-nearest-neighbor-search
98
(let [search (make-nearest-neighbor-search d/euclidean [[2 3] [5 4] [9 6] [4 7] [8 1] [7 2]])]
10-
(is (= [7 2] (pq/item-value (second (search 2 [8 1])))))
11-
(is (= [5 4] (pq/item-value (second (search 2 [2 3])))))
12-
(is (= [8 1] (pq/item-value (second (search 2 [7 2])))))
13-
(is (= [5 4] (pq/item-value (second (search 2 [4 7])))))
9+
(is (= [7 2] (item-value (second (search 2 [8 1])))))
10+
(is (= [5 4] (item-value (second (search 2 [2 3])))))
11+
(is (= [8 1] (item-value (second (search 2 [7 2])))))
12+
(is (= [5 4] (item-value (second (search 2 [4 7])))))
1413
(is (= 3 (count (search 3 [2 3]))))
1514
(is (= 6 (count (search 6 [2 3]))))
1615
(is (= 6 (count (search 9 [2 3]))))))
1716

1817
(deftest test-nearest-neighbor-search2
1918
(let [search (make-nearest-neighbor-search d/euclidean [[1 11] [2 5] [4 8] [6 4] [5 0] [7 9] [8 2]])]
20-
(is (= [4 8] (pq/item-value (first (search 5 [3 9])))))))
19+
(is (= [4 8] (item-value (first (search 5 [3 9])))))))
2120

2221
(deftest test-nearest-neighbor-search3
2322
(let [points {[0.0 0.0] 1
@@ -79,11 +78,11 @@
7978
[37.759859 -122.437134] :SanFrancisco}
8079
search (make-nearest-neighbor-search d/euclidean (keys points))]
8180
(is (= :SanJose
82-
(-> (search 2 ((map-invert points) :SantaCruz)) second pq/item-value points)))
81+
(-> (search 2 ((map-invert points) :SantaCruz)) second item-value points)))
8382
(is (= :SanFrancisco
84-
(-> (search 2 ((map-invert points) :Berkeley)) second pq/item-value points)))
83+
(-> (search 2 ((map-invert points) :Berkeley)) second item-value points)))
8584
(is (= :MountainView
86-
(-> (search 2 ((map-invert points) :PaloAlto)) second pq/item-value points)))))
85+
(-> (search 2 ((map-invert points) :PaloAlto)) second item-value points)))))
8786

8887
(deftest test-nearest-neighbor-search-metadata
8988
(let [points [[:a 2 3]
@@ -93,10 +92,10 @@
9392
[:e 8 1]
9493
[:f 7 2]]
9594
search (make-nearest-neighbor-search d/euclidean rest points)]
96-
(is (= :f (first (pq/item-value (second (search 2 [:e 8 1]))))))
97-
(is (= :b (first (pq/item-value (second (search 2 [:a 2 3]))))))
98-
(is (= :e (first (pq/item-value (second (search 2 [:f 7 2]))))))
99-
(is (= :b (first (pq/item-value (second (search 2 [:d 4 7]))))))))
95+
(is (= :f (first (item-value (second (search 2 [:e 8 1]))))))
96+
(is (= :b (first (item-value (second (search 2 [:a 2 3]))))))
97+
(is (= :e (first (item-value (second (search 2 [:f 7 2]))))))
98+
(is (= :b (first (item-value (second (search 2 [:d 4 7]))))))))
10099

101100
(deftest test-nearest-neighbors-classifier
102101
(let [data [[25 40000 :no]

0 commit comments

Comments
 (0)