@@ -51,18 +51,15 @@ def support (AsDist x: Dist m) : List (m & Float) =
51
51
concat $ for i. select (x.i > 0.0) (AsList 1 [(i, x.i)]) mempty
52
52
53
53
instance Arbitrary (Dist m)
54
- arb = \key. normalize $ arb key
54
+ arb = \key.
55
+ a = arb key
56
+ normalize $ for i. abs a.i
55
57
56
58
' We can define some combinators for taking expectations.
57
59
58
60
def expect [VSpace out] (AsDist x: Dist m) (y : m => out) : out =
59
61
sum for m'. x.m' .* y.m'
60
62
61
- ' And for independent distributions.
62
-
63
- def (,,) (x: Dist m) (y: Dist n) : Dist (m & n) =
64
- AsDist for (m',n'). (m' ?? x) * (n' ?? y)
65
-
66
63
' To represent conditional probabilities such as $ Pr(B \ |\ A)$ we define a type alias.
67
64
68
65
def Pr (b:Type) (a:Type): Type = a => Dist b
@@ -166,7 +163,6 @@ indicator variables to represent data observations.
166
163
167
164
' ## Differential Posterior Inference
168
165
169
-
170
166
' The network polynomial is a convenient method for computing probilities,
171
167
but what makes it particularly useful is that it allows us to compute
172
168
posterior probabilities simply using derivatives.
@@ -193,6 +189,9 @@ yields posterior terms.
193
189
194
190
def posterior (f : (Var a) -> Float) : Dist a =
195
191
AsDist $ (grad (\ x. log $ f x)) one
192
+ def posteriorTab (f : m => (Var a) -> Float) : m => Dist a =
193
+ out = (grad (\ x. log $ f x)) one
194
+ for i. AsDist $ out.i
196
195
197
196
' And this yields exactly the term above! This is really neat though because it
198
197
doesn't require any application of model specific inference.
@@ -258,7 +257,7 @@ posterior (\m. two_dice latent m)
258
257
259
258
support $ posterior (\m. two_dice m (observed (roll_sum 4)))
260
259
261
- ' ## Conditional Independence
260
+ ' ## Discussion - Conditional Independence
262
261
263
262
' One tricky problem for discrete PPLs is modeling conditional independence.
264
263
Models can be very slow to compute if we are not careful to exploint
@@ -323,7 +322,7 @@ def yesno (x:Bool) : Dist YesNo = delta $ select x yes no
323
322
1. Finally we will see if we won.
324
323
325
324
326
- def monte_hall (change': Var YesNo) (win': Var YesNo) : Float =
325
+ def monty_hall (change': Var YesNo) (win': Var YesNo) : Float =
327
326
(one ~ uniform) (for (pick, correct): (Doors & Doors).
328
327
(change' ~ uniform) (for change.
329
328
(win' ~ (select (change == yes)
@@ -334,30 +333,53 @@ def monte_hall (change': Var YesNo) (win': Var YesNo) : Float =
334
333
' To check the odds we will compute probabity of winning conditioned
335
334
on changing.
336
335
337
- yes ?? (posterior $ monte_hall (observed yes))
336
+ yes ?? (posterior $ monty_hall (observed yes))
338
337
339
338
340
339
' And compare to proability of winning with no change.
341
340
342
- yes ?? (posterior $ monte_hall (observed no))
341
+ yes ?? (posterior $ monty_hall (observed no))
343
342
344
343
' Finally a neat trick is that we can get both these terms by taking a second derivative. (TODO: show this in Dex)
345
344
346
345
347
346
' ## Example 5: Hidden Markov Models
348
347
348
+ ' Finally we conclude with a more complex example. A hidden Markov model is
349
+ one of the most widely used discrete time series models. It models the relationship between discrete hidden states $Z$ and emissions $X$.
350
+
351
+ Z = Fin 5
352
+ X = Fin 10
353
+
354
+ ' It consists of three distributions: initial, transition, and emission.
355
+
356
+ initial : Pr Z nil = arb $ newKey 1
357
+ emission : Pr X Z = arb $ newKey 2
358
+ transition : Pr Z Z = arb $ newKey 3
359
+
360
+ ' The model itself takes the following form for $m$ steps.
361
+ '
362
+ $$ z_0 \sim initial$$
363
+ $$ z_1 \sim transition(z_0)$$
364
+ $$ x_1 \sim emission(z_1)$$
365
+ $$ ...$$
366
+
367
+ ' This is implemented in reverse order for clarity (backward algorithm).
368
+
369
+ def hmm (init': Var Z) (x': m => Var X) (z' : m => Var Z) : Float =
370
+ (init' ~ initial.nil) $ yieldState one ( \future .
371
+ for i:m.
372
+ j = ((size m) - (ordinal i) - 1)@_
373
+ future := for z.
374
+ (x'.j ~ emission.z) (for _.
375
+ (z'.j ~ transition.z) (get future)))
376
+
377
+
378
+ ' We can marginalize out over latents.
349
379
380
+ hmm (observed (1@_)) (for i:(Fin 2). observed (1@_)) (for i. latent)
350
381
351
- def hmm (hidden_vars : m => Var Z) (init_var: Var Z) (x_vars: m => Var X)
352
- (transition : CDist Z Z) (emission: CDist X Z)
353
- : Float =
354
382
355
- -- Sample an initial state
356
- initial = sample init_var uniform []
357
- sum $ yieldState ( \zref .
358
- for i.
359
- -- Sample next state
360
- z' = markov $ sample hidden_vars.i transition (get zref)
383
+ ' Or we can compute the posterior probabilities of specific values.
361
384
362
- -- Factor in evidence
363
- zref := sample x_vars.i emission z'')
385
+ posteriorTab $ \z . hmm (observed (1@_)) (for i:(Fin 2). observed (1@_)) z
0 commit comments