|
1 | 1 | ### A Pluto.jl notebook ### |
2 | | -# v0.17.2 |
| 2 | +# v0.19.5 |
3 | 3 |
|
4 | 4 | using Markdown |
5 | 5 | using InteractiveUtils |
@@ -32,9 +32,6 @@ We've discussed the `RandomWalk1D` environment before. In previous example, the |
32 | 32 | # ╔═╡ 6a0881f0-5c6d-11eb-143e-0196833abc05 |
33 | 33 | ACTIONS = collect(Iterators.flatten((-100:-1, 1:100))) |
34 | 34 |
|
35 | | -# ╔═╡ 7ee0867c-5c6d-11eb-11b4-a7858177564f |
36 | | -NA = length(ACTIONS) |
37 | | - |
38 | 35 | # ╔═╡ 7aae4986-5c6d-11eb-09b0-fd883165bc72 |
39 | 36 | NS = 1002 |
40 | 37 |
|
@@ -140,9 +137,9 @@ run(agent_1, env_1, StopAfterEpisode(10^5),hook) |
140 | 137 | begin |
141 | 138 | fig_9_1 = plot(legend=:topleft, ylabel="Value scale", xlabel="State", right_margin = 1.5cm) |
142 | 139 | fig_9_1_right = twinx(fig_9_1) |
143 | | - plot!(fig_9_1, hook.counts./sum(hook.counts), color=:gray, label="state distribution") |
144 | | - plot!(fig_9_1_right, agent_1.policy.learner.approximator.(env_1.state_mapping(s) for s in 2:NS-1), label="MC Learner", legend=:bottomright) |
145 | | - plot!(fig_9_1_right, TRUE_STATE_VALUES[2:end-1], label="true values",legend=:bottomright, ylabel="Distribution scale") |
| 140 | + plot!(fig_9_1_right, hook.counts./sum(hook.counts), color=:gray, label="state distribution") |
| 141 | + plot!(fig_9_1, agent_1.policy.learner.approximator.(env_1.state_mapping(s) for s in 2:NS-1), label="MC Learner", legend=:bottomright) |
| 142 | + plot!(fig_9_1, TRUE_STATE_VALUES[2:end-1], label="true values",legend=:bottomright, ylabel="Distribution scale") |
146 | 143 | end |
147 | 144 |
|
148 | 145 | # ╔═╡ 23060d86-5c70-11eb-2faa-a3851e3b5d2f |
@@ -279,6 +276,9 @@ begin |
279 | 276 | end |
280 | 277 |
|
281 | 278 |
|
| 279 | +# ╔═╡ 7ee0867c-5c6d-11eb-11b4-a7858177564f |
| 280 | +NA = length(ACTIONS) |
| 281 | + |
282 | 282 | # ╔═╡ 87c528bc-5c75-11eb-2f2f-adf254afda01 |
283 | 283 | function run_once_MC(preprocessor, order, α) |
284 | 284 | env = StateTransformedEnv( |
|
0 commit comments