Skip to content

Commit 31f8376

Browse files
Updated notebooks
1 parent cfe0334 commit 31f8376

15 files changed

+69
-124
lines changed

Chapter01/06_neural_evolutionary_agent.ipynb

Lines changed: 6 additions & 6 deletions
Large diffs are not rendered by default.

Chapter02/3_temporal_difference_learning.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/4_monte_carlo_prediction_and_control_rl.ipynb

Lines changed: 2 additions & 2 deletions
Large diffs are not rendered by default.

Chapter02/5_sarsa_sarsa_lambda.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/6_q_learning.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/7_policy_gradients.ipynb

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,13 @@
323323
"Episode#:0 ep_reward:-171.0\r",
324324
"Episode#:0 ep_reward:-172.0\r",
325325
"Episode#:0 ep_reward:-173.0\r",
326-
"Episode#:0 ep_reward:-174.0\r",
326+
"Episode#:0 ep_reward:-174.0\r"
327+
]
328+
},
329+
{
330+
"name": "stdout",
331+
"output_type": "stream",
332+
"text": [
327333
"Episode#:0 ep_reward:-175.0\r",
328334
"Episode#:0 ep_reward:-176.0\r",
329335
"Episode#:0 ep_reward:-177.0\r",
@@ -540,7 +546,13 @@
540546
"Episode#:1 ep_reward:-174.0\r",
541547
"Episode#:1 ep_reward:-175.0\r",
542548
"Episode#:1 ep_reward:-176.0\r",
543-
"Episode#:1 ep_reward:-177.0\r",
549+
"Episode#:1 ep_reward:-177.0\r"
550+
]
551+
},
552+
{
553+
"name": "stdout",
554+
"output_type": "stream",
555+
"text": [
544556
"Episode#:1 ep_reward:-178.0\r",
545557
"Episode#:1 ep_reward:-179.0\r",
546558
"Episode#:1 ep_reward:-180.0\r",

Chapter02/8_actor_critic_agent.ipynb

Lines changed: 13 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -161,116 +161,51 @@
161161
"Episode#:0 ep_reward:7.0\r",
162162
"Episode#:0 ep_reward:8.0\r",
163163
"Episode#:0 ep_reward:9.0\r",
164-
"Episode#:0 ep_reward:10.0\r",
164+
"Episode#:0 ep_reward:10.0\r\n",
165+
"\n",
165166
"Episode#:0 ep_reward:11.0\r",
166-
"Episode#:0 ep_reward:12.0\r",
167-
"Episode#:0 ep_reward:13.0\r",
168-
"Episode#:0 ep_reward:14.0\r",
169-
"Episode#:0 ep_reward:15.0\r",
170-
"Episode#:0 ep_reward:16.0\r"
171-
]
172-
},
173-
{
174-
"name": "stdout",
175-
"output_type": "stream",
176-
"text": [
177-
"Episode#:0 ep_reward:17.0\r",
178-
"Episode#:0 ep_reward:18.0\r",
179-
"Episode#:0 ep_reward:19.0\r",
180-
"Episode#:0 ep_reward:20.0\r",
181-
"Episode#:0 ep_reward:21.0\r",
182-
"Episode#:0 ep_reward:22.0\r",
183-
"Episode#:0 ep_reward:23.0\r",
184-
"Episode#:0 ep_reward:24.0\r"
167+
"Episode#:1 ep_reward:1.0\r"
185168
]
186169
},
187170
{
188171
"name": "stdout",
189172
"output_type": "stream",
190173
"text": [
191-
"\n",
192-
"\n",
193-
"Episode#:0 ep_reward:25.0\r",
194-
"Episode#:1 ep_reward:1.0\r",
195174
"Episode#:1 ep_reward:2.0\r",
196175
"Episode#:1 ep_reward:3.0\r",
197176
"Episode#:1 ep_reward:4.0\r",
198177
"Episode#:1 ep_reward:5.0\r",
199178
"Episode#:1 ep_reward:6.0\r",
200179
"Episode#:1 ep_reward:7.0\r",
201180
"Episode#:1 ep_reward:8.0\r",
202-
"Episode#:1 ep_reward:9.0\r",
203-
"Episode#:1 ep_reward:10.0\r",
204-
"Episode#:1 ep_reward:11.0\r",
205-
"Episode#:1 ep_reward:12.0\r",
206-
"Episode#:1 ep_reward:13.0\r",
207-
"Episode#:1 ep_reward:14.0\r",
208-
"Episode#:1 ep_reward:15.0\r"
181+
"Episode#:1 ep_reward:9.0\r"
209182
]
210183
},
211184
{
212185
"name": "stdout",
213186
"output_type": "stream",
214187
"text": [
188+
"Episode#:1 ep_reward:10.0\r",
189+
"Episode#:1 ep_reward:11.0\r",
190+
"Episode#:1 ep_reward:12.0\r",
191+
"Episode#:1 ep_reward:13.0\r",
192+
"Episode#:1 ep_reward:14.0\r",
193+
"Episode#:1 ep_reward:15.0\r",
215194
"Episode#:1 ep_reward:16.0\r",
216195
"Episode#:1 ep_reward:17.0\r",
217196
"Episode#:1 ep_reward:18.0\r",
218197
"Episode#:1 ep_reward:19.0\r",
219198
"Episode#:1 ep_reward:20.0\r",
220-
"Episode#:1 ep_reward:21.0\r",
221-
"Episode#:1 ep_reward:22.0\r",
222-
"Episode#:1 ep_reward:23.0\r"
223-
]
224-
},
225-
{
226-
"name": "stdout",
227-
"output_type": "stream",
228-
"text": [
229-
"Episode#:1 ep_reward:24.0\r",
230-
"Episode#:1 ep_reward:25.0\r",
231-
"Episode#:1 ep_reward:26.0\r",
232-
"Episode#:1 ep_reward:27.0\r",
233-
"Episode#:1 ep_reward:28.0\r",
234-
"Episode#:1 ep_reward:29.0\r",
235-
"Episode#:1 ep_reward:30.0\r",
236-
"Episode#:1 ep_reward:31.0\r",
237-
"Episode#:1 ep_reward:32.0\r",
238-
"Episode#:1 ep_reward:33.0\r",
239-
"Episode#:1 ep_reward:34.0\r",
240-
"Episode#:1 ep_reward:35.0\r",
241-
"Episode#:1 ep_reward:36.0\r",
242-
"Episode#:1 ep_reward:37.0\r",
243-
"Episode#:1 ep_reward:38.0\r",
244-
"Episode#:1 ep_reward:39.0\r"
245-
]
246-
},
247-
{
248-
"name": "stdout",
249-
"output_type": "stream",
250-
"text": [
251-
"Episode#:1 ep_reward:40.0\r",
252-
"Episode#:1 ep_reward:41.0\r",
253-
"Episode#:1 ep_reward:42.0\r",
254-
"Episode#:1 ep_reward:43.0\r",
255-
"Episode#:1 ep_reward:44.0\r",
256-
"Episode#:1 ep_reward:45.0\r",
257-
"Episode#:1 ep_reward:46.0\r",
258-
"Episode#:1 ep_reward:47.0\r"
199+
"Episode#:1 ep_reward:21.0\r"
259200
]
260201
},
261202
{
262203
"name": "stdout",
263204
"output_type": "stream",
264205
"text": [
265-
"Episode#:1 ep_reward:48.0\r",
266-
"Episode#:1 ep_reward:49.0\r",
267-
"Episode#:1 ep_reward:50.0\r",
268-
"Episode#:1 ep_reward:51.0\r",
269-
"Episode#:1 ep_reward:52.0\r",
270-
"Episode#:1 ep_reward:53.0\r",
271-
"Episode#:1 ep_reward:54.0\r\n",
272206
"\n",
273-
"Episode#:1 ep_reward:55.0\r"
207+
"\n",
208+
"Episode#:1 ep_reward:22.0\r"
274209
]
275210
}
276211
],

Chapter03/1_double_dqn.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"name": "stdout",
7373
"output_type": "stream",
7474
"text": [
75-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20210524-054846\n"
75+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20210526-035407\n"
7676
]
7777
}
7878
],
@@ -195,7 +195,7 @@
195195
" action = self.model.get_action(observation)\n",
196196
" next_observation, reward, done, _ = self.env.step(action)\n",
197197
" self.buffer.store(\n",
198-
" observation, action, reward * 0.01, next_observation, done\n",
198+
" observation, action, reward, next_observation, done\n",
199199
" )\n",
200200
" episode_reward += reward\n",
201201
" observation = next_observation\n",
@@ -216,14 +216,14 @@
216216
"name": "stdout",
217217
"output_type": "stream",
218218
"text": [
219-
"Episode#0 Reward:21.0\n"
219+
"Episode#0 Reward:10.0\n"
220220
]
221221
},
222222
{
223223
"name": "stdout",
224224
"output_type": "stream",
225225
"text": [
226-
"Episode#1 Reward:40.0\n"
226+
"Episode#1 Reward:14.0\n"
227227
]
228228
}
229229
],

Chapter03/1_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20210524-054851\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20210526-035412\n"
8484
]
8585
}
8686
],
@@ -221,14 +221,14 @@
221221
"name": "stdout",
222222
"output_type": "stream",
223223
"text": [
224-
"Episode#0 Reward:13.0\n"
224+
"Episode#0 Reward:31.0\n"
225225
]
226226
},
227227
{
228228
"name": "stdout",
229229
"output_type": "stream",
230230
"text": [
231-
"Episode#1 Reward:30.0\n"
231+
"Episode#1 Reward:36.0\n"
232232
]
233233
}
234234
],

Chapter03/2_dueling_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDQN/CartPole-v0/20210524-054842\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DuelingDQN/CartPole-v0/20210526-035403\n"
8484
]
8585
}
8686
],
@@ -225,14 +225,14 @@
225225
"name": "stdout",
226226
"output_type": "stream",
227227
"text": [
228-
"Episode#0 Reward:22.0\n"
228+
"Episode#0 Reward:19.0\n"
229229
]
230230
},
231231
{
232232
"name": "stdout",
233233
"output_type": "stream",
234234
"text": [
235-
"Episode#1 Reward:14.0\n"
235+
"Episode#1 Reward:13.0\n"
236236
]
237237
}
238238
],

0 commit comments

Comments
 (0)