|
1 | 1 | ### A Pluto.jl notebook ###
|
2 |
| -# v0.17.2 |
| 2 | +# v0.19.5 |
3 | 3 |
|
4 | 4 | using Markdown
|
5 | 5 | using InteractiveUtils
|
@@ -32,9 +32,6 @@ We've discussed the `RandomWalk1D` environment before. In previous example, the
|
32 | 32 | # ╔═╡ 6a0881f0-5c6d-11eb-143e-0196833abc05
|
33 | 33 | ACTIONS = collect(Iterators.flatten((-100:-1, 1:100)))
|
34 | 34 |
|
35 |
| -# ╔═╡ 7ee0867c-5c6d-11eb-11b4-a7858177564f |
36 |
| -NA = length(ACTIONS) |
37 |
| - |
38 | 35 | # ╔═╡ 7aae4986-5c6d-11eb-09b0-fd883165bc72
|
39 | 36 | NS = 1002
|
40 | 37 |
|
@@ -140,9 +137,9 @@ run(agent_1, env_1, StopAfterEpisode(10^5),hook)
|
140 | 137 | begin
|
141 | 138 | fig_9_1 = plot(legend=:topleft, ylabel="Value scale", xlabel="State", right_margin = 1.5cm)
|
142 | 139 | fig_9_1_right = twinx(fig_9_1)
|
143 |
| - plot!(fig_9_1, hook.counts./sum(hook.counts), color=:gray, label="state distribution") |
144 |
| - plot!(fig_9_1_right, agent_1.policy.learner.approximator.(env_1.state_mapping(s) for s in 2:NS-1), label="MC Learner", legend=:bottomright) |
145 |
| - plot!(fig_9_1_right, TRUE_STATE_VALUES[2:end-1], label="true values",legend=:bottomright, ylabel="Distribution scale") |
| 140 | + plot!(fig_9_1_right, hook.counts./sum(hook.counts), color=:gray, label="state distribution") |
| 141 | + plot!(fig_9_1, agent_1.policy.learner.approximator.(env_1.state_mapping(s) for s in 2:NS-1), label="MC Learner", legend=:bottomright) |
| 142 | + plot!(fig_9_1, TRUE_STATE_VALUES[2:end-1], label="true values",legend=:bottomright, ylabel="Distribution scale") |
146 | 143 | end
|
147 | 144 |
|
148 | 145 | # ╔═╡ 23060d86-5c70-11eb-2faa-a3851e3b5d2f
|
@@ -279,6 +276,9 @@ begin
|
279 | 276 | end
|
280 | 277 |
|
281 | 278 |
|
| 279 | +# ╔═╡ 7ee0867c-5c6d-11eb-11b4-a7858177564f |
| 280 | +NA = length(ACTIONS) |
| 281 | + |
282 | 282 | # ╔═╡ 87c528bc-5c75-11eb-2f2f-adf254afda01
|
283 | 283 | function run_once_MC(preprocessor, order, α)
|
284 | 284 | env = StateTransformedEnv(
|
|
0 commit comments