|
5 | 5 | ``` |
6 | 6 | (def data [[0 0 [0]] [0 1 [1]] [1 0 [1]] [1 1 [0]]]) |
7 | 7 | (def fit |
8 | | - (let [hidden-layers [3] |
9 | | - alpha 0.5 |
10 | | - lambda 0.001] |
11 | | - (-> #(neural-network-fit % data) |
12 | | - (iterate (make-neural-network hidden-layers alpha lambda)) |
| 8 | + (let [alpha 0.5 |
| 9 | + lambda 0.001 |
| 10 | + model (-> (make-neural-network alpha lambda) |
| 11 | + (add-neural-network-layer 2 sigmoid) ;; input layer |
| 12 | + (add-neural-network-layer 3 sigmoid) ;; hidden layer |
| 13 | + (add-neural-network-layer 1 sigmoid))] ;; output layer |
| 14 | + (-> (iterate #(neural-network-fit % data) model) |
13 | 15 | (nth 5000)))) |
14 | 16 | (neural-network-predict fit (map butlast data)) |
15 | 17 | ;;=> [[0.04262340225834812] [0.9582632706756758] [0.9581124103456861] [0.04103544440312673]] |
|
29 | 31 | (defn feed-forward |
30 | 32 | "Returns the activation values for nodes in a neural network after forward |
31 | 33 | propagating the values of a single input example x through the network." |
32 | | - [x theta] |
33 | | - (reduce (fn [activations weights] |
| 34 | + [x theta fns] |
| 35 | + (reduce (fn [activations [weights f]] |
34 | 36 | (let [inputs (if (empty? activations) (m/matrix x) (last activations)) |
35 | 37 | inputs+bias (m/join bias inputs) |
36 | | - outputs (m/emap c/sigmoid (m/mmul weights inputs+bias))] |
| 38 | + outputs (m/emap f (m/mmul weights inputs+bias))] |
37 | 39 | (conj activations outputs))) |
38 | 40 | [] |
39 | | - theta)) |
| 41 | + (map vector theta fns))) |
40 | 42 |
|
41 | 43 | (defn feed-forward-batch |
42 | 44 | "Returns the activation values for nodes in a neural network after forward |
43 | 45 | propagating a collection of input examples x through the network." |
44 | | - [x theta] |
45 | | - (-> (reduce (fn [inputs weights] |
| 46 | + [x theta fns] |
| 47 | + (-> (reduce (fn [inputs [weights f]] |
46 | 48 | (let [bias (m/broadcast 1.0 [1 (m/column-count inputs)]) |
47 | 49 | inputs+bias (m/join bias inputs) |
48 | | - outputs (m/emap c/sigmoid (m/mmul weights inputs+bias))] |
| 50 | + outputs (m/emap f (m/mmul weights inputs+bias))] |
49 | 51 | outputs)) |
50 | 52 | (m/transpose (m/matrix x)) |
51 | | - theta) |
| 53 | + (map vector theta fns)) |
52 | 54 | (m/transpose))) |
53 | 55 |
|
54 | 56 | (defn back-propagate |
55 | 57 | "Returns the errors of each node in a neural network after propagating the |
56 | 58 | the errors at the output nodes, computed against a single target value y, |
57 | 59 | backwards through the network." |
58 | | - [y theta activations output-error] |
59 | | - (->> (map vector (reverse (rest theta)) (reverse (butlast activations))) |
60 | | - (reduce (fn [errors [w a]] |
61 | | - (cons (m/mul a (m/sub 1 a) (m/mmul (first errors) (drop-bias w))) |
| 60 | + [y theta fns' activations output-error] |
| 61 | + (->> (map vector |
| 62 | + (reverse (rest theta)) |
| 63 | + (reverse (butlast activations)) |
| 64 | + (reverse (butlast fns'))) |
| 65 | + (reduce (fn [errors [w a f]] |
| 66 | + (cons (m/mul (m/emap f a) (m/mmul (first errors) (drop-bias w))) |
62 | 67 | errors)) |
63 | | - (list (output-error y (last activations)))) |
| 68 | + (list (output-error y (last activations) (last fns')))) |
64 | 69 | (vec))) |
65 | 70 |
|
66 | 71 | (defn compute-gradients |
|
77 | 82 | "Returns the numeric approximations of the gradients for each weight given the |
78 | 83 | input values of a single example x and label y. Used for debugging by checking |
79 | 84 | against the computed gradients during backpropagation." |
80 | | - [x y theta cost] |
| 85 | + [x y theta fns cost] |
81 | 86 | (mapv (fn [k weights] |
82 | 87 | (m/matrix (for [i (range (m/row-count weights))] |
83 | 88 | (for [j (range (m/column-count weights))] |
84 | 89 | (let [w (m/select weights i j) |
85 | 90 | theta+ (assoc theta k (m/set-selection weights i j (+ w epsilon))) |
86 | 91 | theta- (assoc theta k (m/set-selection weights i j (- w epsilon)))] |
87 | | - (/ (- (cost (list x) (list y) theta+) |
88 | | - (cost (list x) (list y) theta-)) |
| 92 | + (/ (- (cost (list x) (list y) theta+ fns) |
| 93 | + (cost (list x) (list y) theta- fns)) |
89 | 94 | (* 2 epsilon))))))) |
90 | 95 | (range) |
91 | 96 | theta)) |
92 | 97 |
|
93 | 98 | (defn gradient-descent-step |
94 | 99 | "Performs a single gradient step on the input and target values of a single |
95 | 100 | example x and label y, and returns the updated weights." |
96 | | - [x y theta alpha lambda cost output-error] |
97 | | - (let [activations (feed-forward x theta) |
98 | | - errors (back-propagate y theta activations output-error) |
| 101 | + [x y theta fns alpha lambda cost output-error] |
| 102 | + (let [activations (feed-forward x theta fns) |
| 103 | + errors (back-propagate y theta (map c/derivative fns) activations output-error) |
99 | 104 | gradients (compute-gradients x activations errors) |
100 | 105 | regularization (map (fn [w] |
101 | 106 | (-> (m/mul alpha lambda w) |
102 | 107 | (m/set-column 0 (m/matrix (repeat (m/row-count w) 0))))) |
103 | 108 | theta)] |
104 | 109 | ;; Numeric gradient checking |
105 | | - ;;(println (map (comp #(/ (m/esum %) (m/ecount %)) m/abs m/sub) gradients (numeric-gradients x y theta cost))) |
| 110 | + ;;(println (map (comp #(/ (m/esum %) (m/ecount %)) m/abs m/sub) gradients (numeric-gradients x y theta fns cost))) |
106 | 111 | (mapv m/sub theta (map #(m/mul % alpha) gradients) regularization))) |
107 | 112 |
|
108 | 113 | (defn gradient-descent |
109 | 114 | "Performs gradient descent on input and target values of all examples x and |
110 | 115 | y, and returns the updated weights." |
111 | 116 | [model x y] |
112 | | - (let [{alpha :alpha lambda :lambda theta :parameters cost :cost output-error :output-error} model] |
| 117 | + (let [{alpha :alpha lambda :lambda theta :parameters cost :cost |
| 118 | + fns :activation-fns output-error :output-error} model] |
113 | 119 | (loop [inputs x |
114 | 120 | targets y |
115 | 121 | weights theta] |
|
120 | 126 | (gradient-descent-step (first inputs) |
121 | 127 | (first targets) |
122 | 128 | weights |
| 129 | + fns |
123 | 130 | alpha |
124 | 131 | lambda |
125 | 132 | cost |
|
139 | 146 | ;; Cost functions |
140 | 147 |
|
141 | 148 | (defn cross-entropy-cost |
142 | | - [x y theta] |
143 | | - (let [a (feed-forward-batch x theta)] |
| 149 | + [x y theta fns] |
| 150 | + (let [a (feed-forward-batch x theta fns)] |
144 | 151 | (/ (m/esum (m/add (m/mul y (m/log a)) |
145 | 152 | (m/mul (m/sub 1 y) (m/log (m/sub 1 a))))) |
146 | 153 | (- (count x))))) |
147 | 154 |
|
148 | 155 | (defn cross-entropy-output-error |
149 | | - [y activations] |
| 156 | + [y activations f'] |
| 157 | + ;; Cross entropy error is independent of the derivative of output activation |
150 | 158 | (m/sub activations y)) |
151 | 159 |
|
152 | 160 | (defn quadratic-cost |
153 | | - [x y theta] |
154 | | - (/ (m/esum (m/square (m/sub (feed-forward-batch x theta) y))) |
| 161 | + [x y theta fns] |
| 162 | + (/ (m/esum (m/square (m/sub (feed-forward-batch x theta fns) y))) |
155 | 163 | 2)) |
156 | 164 |
|
157 | 165 | (defn quadratic-output-error |
158 | | - [y activations] |
159 | | - (m/mul (m/sub activations y) activations (m/sub 1 activations))) |
| 166 | + [y activations f'] |
| 167 | + (m/mul (m/sub activations y) (m/emap f' activations))) |
160 | 168 |
|
161 | 169 | ;; API |
162 | 170 |
|
|
166 | 174 | ([model data] |
167 | 175 | (neural-network-fit model (map (comp vec butlast) data) (map (comp vec last) data))) |
168 | 176 | ([model x y] |
169 | | - (let [{hidden :hidden layers :layers theta :parameters} model |
170 | | - layers (or layers |
171 | | - (concat [(count (first x))] ;; number of input nodes |
172 | | - hidden ;; number of nodes at each hidden layer |
173 | | - [(count (first y))])) ;; number of output nodes |
| 177 | + (let [{layers :layers theta :parameters} model |
174 | 178 | model (-> model |
175 | | - (assoc :layers layers) |
176 | 179 | (assoc :parameters (or theta (init-parameters layers))))] |
177 | 180 | (assoc model :parameters (gradient-descent model x y))))) |
178 | 181 |
|
179 | 182 | (defn neural-network-predict |
180 | 183 | "Predicts the values of example data using a neural network model." |
181 | 184 | [model x] |
182 | | - (let [{theta :parameters} model] |
| 185 | + (let [{theta :parameters fns :activation-fns} model] |
183 | 186 | (when (not (nil? theta)) |
184 | | - (mapv vec (feed-forward-batch x theta))))) |
| 187 | + (mapv vec (feed-forward-batch x theta fns))))) |
185 | 188 |
|
186 | 189 | (defn neural-network-cost |
187 | 190 | ([model data] |
188 | 191 | (neural-network-cost model (map (comp vec butlast) data) (map (comp vec last) data))) |
189 | 192 | ([model x y] |
190 | | - (let [{theta :parameters cost :cost} model] |
| 193 | + (let [{theta :parameters fns :activation-fns cost :cost} model] |
191 | 194 | (when (not (nil? theta)) |
192 | | - (cost x y theta))))) |
| 195 | + (cost x y theta fns))))) |
193 | 196 |
|
194 | 197 | (defn print-neural-network |
195 | 198 | "Prints information about a given neural network." |
|
202 | 205 | (str (dec (count (first thetai))) " x " (count thetai)))))))) |
203 | 206 |
|
204 | 207 | (defn make-neural-network |
205 | | - "Returns a neural network model where alpha is the learning rate and hidden is |
206 | | - a sequence of numbers where the ith element is the number of nodes in the ith |
207 | | - hidden layer." |
208 | | - ([hidden alpha lambda] |
209 | | - (make-neural-network hidden alpha lambda cross-entropy-cost)) |
210 | | - ([hidden alpha lambda cost] |
| 208 | + "Returns a neural network model where alpha is the learning rate." |
| 209 | + ([alpha lambda] |
| 210 | + (make-neural-network alpha lambda cross-entropy-cost)) |
| 211 | + ([alpha lambda cost] |
211 | 212 | {:alpha alpha |
212 | 213 | :lambda lambda |
213 | | - :hidden hidden |
| 214 | + :layers [] |
| 215 | + :activation-fns [] |
214 | 216 | :cost cost |
215 | 217 | :output-error (cond |
216 | 218 | (= cost cross-entropy-cost) cross-entropy-output-error |
217 | 219 | (= cost quadratic-cost) quadratic-output-error)})) |
| 220 | + |
| 221 | +(defn add-neural-network-layer |
| 222 | + "Adds a layer to a neural network model with n nodes and an activation |
| 223 | + function f." |
| 224 | + [model n f] |
| 225 | + (-> model |
| 226 | + (update :layers #(conj % n)) |
| 227 | + (update :activation-fns #(conj % f)))) |
0 commit comments