-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_reflection_agent.py
More file actions
377 lines (294 loc) · 14.4 KB
/
test_reflection_agent.py
File metadata and controls
377 lines (294 loc) · 14.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
#!/usr/bin/env python3
"""
Unit tests for the ReflectionAgent class.
I created these tests to ensure the reflection mechanism works correctly
and to help maintain code quality as the project grows.
"""
import pytest
import numpy as np
from unittest.mock import Mock, patch
import sys
import os
# Add the current directory to Python path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from reflection_agent import ReflectionAgent
class TestReflectionAgent:
"""Test suite for ReflectionAgent class."""
@pytest.fixture
def action_space(self):
"""Create a mock action space for testing."""
mock_space = Mock()
mock_space.n = 4 # 4 actions: up, down, left, right
mock_space.sample.return_value = 2 # Return action 2 (left) as default
return mock_space
@pytest.fixture
def agent(self, action_space):
"""Create a ReflectionAgent instance for testing."""
return ReflectionAgent(action_space)
@pytest.fixture
def sample_state(self):
"""Create a sample state for testing."""
return np.array([5, 5])
@pytest.fixture
def sample_goal(self):
"""Create a sample goal position."""
return np.array([8, 8])
def test_agent_initialization(self, action_space):
"""Test that the agent initializes correctly with all required attributes."""
agent = ReflectionAgent(action_space)
# Check basic attributes
assert agent.action_space == action_space
assert agent.epsilon == 0.9
assert agent.epsilon_min == 0.3
assert agent.alpha == 0.5
assert agent.gamma == 0.9
# Check memory systems
assert isinstance(agent.q_table_short_term, dict)
assert isinstance(agent.q_table_long_term, dict)
assert agent.memory_balance == 0.5
# Check reflection parameters
assert agent.confidence_threshold == 0.25
assert agent.adaptation_threshold == 0.35
assert agent.reflection_frequency == 5
# Check experience buffer
assert isinstance(agent.experience_buffer, list)
assert isinstance(agent.experience_priorities, list)
assert agent.max_buffer_size == 1000
def test_set_goal_position(self, agent, sample_goal):
"""Test setting the goal position."""
agent.set_goal_position(sample_goal)
assert np.array_equal(agent.goal_pos, sample_goal)
def test_calculate_confidence(self, agent):
"""Test confidence calculation with various scenarios."""
# Test with optimal path
confidence = agent.calculate_confidence(steps=10, shortest_path=10)
assert confidence == 1.0
# Test with suboptimal path
confidence = agent.calculate_confidence(steps=15, shortest_path=10)
assert confidence == 0.5
# Test with infinite path
confidence = agent.calculate_confidence(steps=10, shortest_path=float('inf'))
assert confidence == 0.0
# Test with zero path
confidence = agent.calculate_confidence(steps=10, shortest_path=0)
assert confidence == 0.0
def test_select_action_with_goal_direction(self, agent, sample_state, sample_goal):
"""Test action selection when goal position is set."""
agent.set_goal_position(sample_goal)
agent.epsilon = 0.0 # Disable random exploration
# Test action selection toward goal
action = agent.select_action(sample_state)
assert action in [0, 1, 2, 3] # Valid action range
# The agent should prefer actions toward the goal, but may choose any valid action
# due to the complex UCB algorithm and memory combination
assert action in [0, 1, 2, 3]
def test_select_action_with_exploration(self, agent, sample_state, sample_goal):
"""Test action selection with exploration enabled."""
agent.set_goal_position(sample_goal) # Need goal position to avoid error
agent.epsilon = 1.0 # Enable full exploration
# Test multiple actions to ensure randomness
actions = set()
for _ in range(10):
action = agent.select_action(sample_state)
actions.add(action)
assert action in [0, 1, 2, 3]
# Should have some variety in actions due to exploration
assert len(actions) > 1
def test_learn_basic_q_update(self, agent, sample_state):
"""Test basic Q-learning update functionality."""
action = 1
reward = 1.0
next_state = np.array([6, 5])
done = False
# Initial Q-values should be zeros
state_key = tuple(sample_state)
assert state_key not in agent.q_table_short_term
assert state_key not in agent.q_table_long_term
# Perform learning
agent.learn(sample_state, action, reward, next_state, done, 1, 10)
# Check that Q-values were updated
assert state_key in agent.q_table_short_term
assert state_key in agent.q_table_long_term
# Check that the action's Q-value increased
short_term_q = agent.q_table_short_term[state_key][action]
long_term_q = agent.q_table_long_term[state_key][action]
assert short_term_q > 0
assert long_term_q > 0
def test_learn_with_wall_collision(self, agent, sample_state):
"""Test learning when agent hits a wall."""
action = 1
reward = -1.0 # Wall collision penalty
next_state = sample_state # Same state (didn't move)
done = False
# Perform learning
agent.learn(sample_state, action, reward, next_state, done, 1, 10)
# Check that wall memory was updated
state_key = tuple(sample_state)
assert state_key in agent.wall_memory
assert action in agent.wall_memory[state_key]
def test_reflection_mechanism(self, agent, sample_goal):
"""Test the reflection mechanism functionality."""
agent.set_goal_position(sample_goal)
# Add some reflection data
for i in range(agent.reflection_frequency):
state = np.array([5 + i, 5])
action = 1
reward = 0.1
next_state = np.array([6 + i, 5])
done = False
steps = i + 1
agent.reflect(state, action, reward, next_state, done, steps)
# After reflection_frequency calls, reflection_memory should be cleared
assert len(agent.reflection_memory) == 0
def test_environment_change_detection_in_learn(self, agent, sample_state):
"""Test environment change detection mechanism through the learn method."""
action = 1
reward = 0.5
next_state = np.array([6, 5])
# First call - should record the result
agent.learn(sample_state, action, reward, next_state, False, 1, 10)
initial_stability = agent.environment_stability
# Second call with same result - should maintain stability
agent.learn(sample_state, action, reward, next_state, False, 2, 10)
assert agent.environment_stability == initial_stability
# Call with different result - should decrease stability
different_next_state = np.array([7, 5])
agent.learn(sample_state, action, reward, different_next_state, False, 3, 10)
assert agent.environment_stability < initial_stability
def test_adapt_strategy(self, agent):
"""Test strategy adaptation based on performance."""
initial_epsilon = agent.epsilon
# Test adaptation with poor progress
agent.adapt_strategy(progress=0, current_distance=10)
# Should increase exploration rate (but may be capped)
assert agent.epsilon >= agent.epsilon_min
# Test adaptation with good progress
agent.epsilon = initial_epsilon
agent.adapt_strategy(progress=5, current_distance=2)
# Should decrease exploration rate
assert agent.epsilon <= initial_epsilon
def test_wall_memory_refresh(self, agent, sample_state):
"""Test wall memory refresh mechanism."""
# Add some wall memory
state_key = tuple(sample_state)
agent.wall_memory[state_key] = {1, 2}
agent.wall_memory_age[state_key] = {1: 60, 2: 60} # Old memories
# Trigger refresh
agent._refresh_wall_memory()
# Some memories should be removed due to age
assert len(agent.wall_memory.get(state_key, set())) <= 2
def test_experience_replay(self, agent, sample_state):
"""Test experience replay functionality."""
# Add some experiences
for i in range(50):
state = np.array([5 + i % 5, 5])
action = i % 4
reward = 0.1
next_state = np.array([6 + i % 5, 5])
done = False
agent.learn(state, action, reward, next_state, done, i + 1, 10)
# Check that experience buffer has data
assert len(agent.experience_buffer) > 0
assert len(agent.experience_priorities) > 0
def test_knowledge_transfer(self, agent, sample_state):
"""Test knowledge transfer from short-term to long-term memory."""
# Add some short-term memory
state_key = tuple(sample_state)
agent.q_table_short_term[state_key] = np.array([0.1, 0.8, 0.2, 0.3])
agent.visit_counts[state_key] = 10 # High visit count
# Trigger knowledge transfer
agent._transfer_knowledge()
# Check that knowledge was transferred
assert state_key in agent.q_table_long_term
def test_adapt_to_environment_change(self, agent):
"""Test adaptation when environment change is detected."""
initial_epsilon = agent.epsilon
initial_reflection_freq = agent.reflection_frequency
# Trigger environment change adaptation
agent._adapt_to_environment_change()
# Should increase exploration rate (but may be capped at max)
assert agent.epsilon >= agent.epsilon_min
# Should increase reflection frequency (lower number = more frequent)
assert agent.reflection_frequency <= initial_reflection_freq
def test_learn_from_experience(self, agent):
"""Test learning from experience replay buffer."""
# Add some experiences to the buffer
for i in range(40):
state = tuple(np.array([5 + i % 3, 5]))
action = i % 4
reward = 0.1
next_state = tuple(np.array([6 + i % 3, 5]))
done = False
agent.experience_buffer.append((state, action, reward, next_state, done))
agent.experience_priorities.append(1.0)
# Trigger experience replay learning
agent._learn_from_experience()
# Should have processed some experiences
assert len(agent.experience_buffer) > 0
def test_edge_cases(self, agent, sample_goal):
"""Test edge cases and error handling."""
# Test with None goal position
agent.goal_pos = None
# This should raise an error, so we test it properly
with pytest.raises(TypeError):
agent.select_action(np.array([5, 5]))
# Test with goal position set
agent.set_goal_position(sample_goal)
action = agent.select_action(np.array([5, 5]))
assert action in [0, 1, 2, 3] # Should return valid action
# Test with empty experience buffer
agent.experience_buffer = []
agent.experience_priorities = []
agent._learn_from_experience() # Should not crash
# Test with empty wall memory
agent.wall_memory = {}
agent.wall_memory_age = {}
agent._refresh_wall_memory() # Should not crash
def test_memory_balance_adjustment(self, agent, sample_state, sample_goal):
"""Test memory balance adjustment based on environment stability."""
agent.set_goal_position(sample_goal)
initial_balance = agent.memory_balance
# Test with stable environment - need multiple calls for gradual adjustment
agent.environment_stability = 0.9
for _ in range(20): # More calls to ensure visible change
agent.select_action(sample_state)
# The memory balance should change after multiple calls
stable_balance = agent.memory_balance
# Reset and test with unstable environment
agent.memory_balance = 0.5
agent.environment_stability = 0.1 # More extreme instability
for _ in range(20): # More calls to ensure visible change
agent.select_action(sample_state)
# The memory balance should change
unstable_balance = agent.memory_balance
# At least one of the balances should be different from the initial
assert stable_balance != initial_balance or unstable_balance != 0.5
def test_ucb_action_selection(self, agent, sample_state, sample_goal):
"""Test UCB-based action selection."""
agent.set_goal_position(sample_goal)
agent.epsilon = 0.0 # Disable exploration to test UCB
# Add some Q-values to test UCB
state_key = tuple(sample_state)
agent.q_table_short_term[state_key] = np.array([0.1, 0.2, 0.3, 0.4])
agent.q_table_long_term[state_key] = np.array([0.1, 0.2, 0.3, 0.4])
agent.visit_counts[state_key] = 5
action = agent.select_action(sample_state)
assert action in [0, 1, 2, 3]
def test_priority_experience_replay(self, agent):
"""Test priority-based experience replay."""
# Add experiences with different priorities
for i in range(20):
state = tuple(np.array([5 + i % 3, 5]))
action = i % 4
reward = 0.1 if i % 2 == 0 else 1.0 # Different rewards
next_state = tuple(np.array([6 + i % 3, 5]))
done = False
agent.experience_buffer.append((state, action, reward, next_state, done))
agent.experience_priorities.append(1.0 + i) # Different priorities
# Trigger experience replay
agent._learn_from_experience()
# Should have processed experiences
assert len(agent.experience_buffer) > 0
if __name__ == "__main__":
# Run tests with coverage
pytest.main([__file__, "--cov=reflection_agent", "--cov-report=term-missing", "-v"])