1
1
import numpy as np
2
2
3
+
3
4
class CartPoleConfigModule ():
4
5
# parameters
5
6
ENV_NAME = "CartPole-v0"
@@ -12,7 +13,7 @@ class CartPoleConfigModule():
12
13
DT = 0.02
13
14
# cost parameters
14
15
R = np .diag ([0.01 ]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
15
- # 1. is worked for iLQR
16
+ # 1. is worked for iLQR
16
17
TERMINAL_WEIGHT = 1.
17
18
Q = None
18
19
Sf = None
@@ -39,41 +40,41 @@ def __init__(self):
39
40
"num_elites" : 50 ,
40
41
"max_iters" : 15 ,
41
42
"alpha" : 0.3 ,
42
- "init_var" :9. ,
43
- "threshold" :0.001
43
+ "init_var" : 9. ,
44
+ "threshold" : 0.001
44
45
},
45
- "MPPI" :{
46
- "beta" : 0.6 ,
46
+ "MPPI" : {
47
+ "beta" : 0.6 ,
47
48
"popsize" : 5000 ,
48
49
"kappa" : 0.9 ,
49
50
"noise_sigma" : 0.5 ,
50
51
},
51
- "MPPIWilliams" :{
52
+ "MPPIWilliams" : {
52
53
"popsize" : 5000 ,
53
54
"lambda" : 1. ,
54
55
"noise_sigma" : 0.9 ,
55
56
},
56
- "iLQR" :{
57
+ "iLQR" : {
57
58
"max_iter" : 500 ,
58
59
"init_mu" : 1. ,
59
60
"mu_min" : 1e-6 ,
60
61
"mu_max" : 1e10 ,
61
62
"init_delta" : 2. ,
62
63
"threshold" : 1e-6 ,
63
- },
64
- "DDP" :{
64
+ },
65
+ "DDP" : {
65
66
"max_iter" : 500 ,
66
67
"init_mu" : 1. ,
67
68
"mu_min" : 1e-6 ,
68
69
"mu_max" : 1e10 ,
69
70
"init_delta" : 2. ,
70
71
"threshold" : 1e-6 ,
71
- },
72
- "NMPC-CGMRES" :{
73
- },
74
- "NMPC-Newton" :{
75
- },
76
- }
72
+ },
73
+ "NMPC-CGMRES" : {
74
+ },
75
+ "NMPC-Newton" : {
76
+ },
77
+ }
77
78
78
79
@staticmethod
79
80
def input_cost_fn (u ):
@@ -87,7 +88,7 @@ def input_cost_fn(u):
87
88
shape(pop_size, pred_len, input_size)
88
89
"""
89
90
return (u ** 2 ) * np .diag (CartPoleConfigModule .R )
90
-
91
+
91
92
@staticmethod
92
93
def state_cost_fn (x , g_x ):
93
94
""" state cost function
@@ -103,21 +104,21 @@ def state_cost_fn(x, g_x):
103
104
"""
104
105
105
106
if len (x .shape ) > 2 :
106
- return (6. * (x [:, :, 0 ]** 2 ) \
107
- + 12. * ((np .cos (x [:, :, 2 ]) + 1. )** 2 ) \
108
- + 0.1 * (x [:, :, 1 ]** 2 ) \
109
- + 0.1 * (x [:, :, 3 ]** 2 ))[:, :, np .newaxis ]
107
+ return (6. * (x [:, :, 0 ]** 2 )
108
+ + 12. * ((np .cos (x [:, :, 2 ]) + 1. )** 2 )
109
+ + 0.1 * (x [:, :, 1 ]** 2 )
110
+ + 0.1 * (x [:, :, 3 ]** 2 ))[:, :, np .newaxis ]
110
111
111
112
elif len (x .shape ) > 1 :
112
- return (6. * (x [:, 0 ]** 2 ) \
113
- + 12. * ((np .cos (x [:, 2 ]) + 1. )** 2 ) \
114
- + 0.1 * (x [:, 1 ]** 2 ) \
115
- + 0.1 * (x [:, 3 ]** 2 ))[:, np .newaxis ]
116
-
113
+ return (6. * (x [:, 0 ]** 2 )
114
+ + 12. * ((np .cos (x [:, 2 ]) + 1. )** 2 )
115
+ + 0.1 * (x [:, 1 ]** 2 )
116
+ + 0.1 * (x [:, 3 ]** 2 ))[:, np .newaxis ]
117
+
117
118
return 6. * (x [0 ]** 2 ) \
118
- + 12. * ((np .cos (x [2 ]) + 1. )** 2 ) \
119
- + 0.1 * (x [1 ]** 2 ) \
120
- + 0.1 * (x [3 ]** 2 )
119
+ + 12. * ((np .cos (x [2 ]) + 1. )** 2 ) \
120
+ + 0.1 * (x [1 ]** 2 ) \
121
+ + 0.1 * (x [3 ]** 2 )
121
122
122
123
@staticmethod
123
124
def terminal_state_cost_fn (terminal_x , terminal_g_x ):
@@ -134,45 +135,45 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
134
135
"""
135
136
136
137
if len (terminal_x .shape ) > 1 :
137
- return (6. * (terminal_x [:, 0 ]** 2 ) \
138
- + 12. * ((np .cos (terminal_x [:, 2 ]) + 1. )** 2 ) \
139
- + 0.1 * (terminal_x [:, 1 ]** 2 ) \
140
- + 0.1 * (terminal_x [:, 3 ]** 2 ))[:, np .newaxis ] \
141
- * CartPoleConfigModule .TERMINAL_WEIGHT
142
-
143
- return (6. * (terminal_x [0 ]** 2 ) \
144
- + 12. * ((np .cos (terminal_x [2 ]) + 1. )** 2 ) \
145
- + 0.1 * (terminal_x [1 ]** 2 ) \
146
- + 0.1 * (terminal_x [3 ]** 2 )) \
138
+ return (6. * (terminal_x [:, 0 ]** 2 )
139
+ + 12. * ((np .cos (terminal_x [:, 2 ]) + 1. )** 2 )
140
+ + 0.1 * (terminal_x [:, 1 ]** 2 )
141
+ + 0.1 * (terminal_x [:, 3 ]** 2 ))[:, np .newaxis ] \
147
142
* CartPoleConfigModule .TERMINAL_WEIGHT
148
-
143
+
144
+ return (6. * (terminal_x [0 ]** 2 )
145
+ + 12. * ((np .cos (terminal_x [2 ]) + 1. )** 2 )
146
+ + 0.1 * (terminal_x [1 ]** 2 )
147
+ + 0.1 * (terminal_x [3 ]** 2 )) \
148
+ * CartPoleConfigModule .TERMINAL_WEIGHT
149
+
149
150
@staticmethod
150
151
def gradient_cost_fn_with_state (x , g_x , terminal = False ):
151
152
""" gradient of costs with respect to the state
152
153
153
154
Args:
154
155
x (numpy.ndarray): state, shape(pred_len, state_size)
155
156
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
156
-
157
+
157
158
Returns:
158
159
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
159
160
or shape(1, state_size)
160
161
"""
161
162
if not terminal :
162
- cost_dx0 = 12. * x [:, 0 ]
163
+ cost_dx0 = 12. * x [:, 0 ]
163
164
cost_dx1 = 0.2 * x [:, 1 ]
164
165
cost_dx2 = 24. * (1 + np .cos (x [:, 2 ])) * - np .sin (x [:, 2 ])
165
166
cost_dx3 = 0.2 * x [:, 3 ]
166
- cost_dx = np .stack ((cost_dx0 , cost_dx1 ,\
167
+ cost_dx = np .stack ((cost_dx0 , cost_dx1 ,
167
168
cost_dx2 , cost_dx3 ), axis = 1 )
168
169
return cost_dx
169
-
170
- cost_dx0 = 12. * x [0 ]
170
+
171
+ cost_dx0 = 12. * x [0 ]
171
172
cost_dx1 = 0.2 * x [1 ]
172
173
cost_dx2 = 24. * (1 + np .cos (x [2 ])) * - np .sin (x [2 ])
173
174
cost_dx3 = 0.2 * x [3 ]
174
175
cost_dx = np .array ([[cost_dx0 , cost_dx1 , cost_dx2 , cost_dx3 ]])
175
-
176
+
176
177
return cost_dx * CartPoleConfigModule .TERMINAL_WEIGHT
177
178
178
179
@staticmethod
@@ -206,21 +207,21 @@ def hessian_cost_fn_with_state(x, g_x, terminal=False):
206
207
hessian [:, 0 , 0 ] = 12.
207
208
hessian [:, 1 , 1 ] = 0.2
208
209
hessian [:, 2 , 2 ] = 24. * - np .sin (x [:, 2 ]) \
209
- * (- np .sin (x [:, 2 ])) \
210
- + 24. * (1. + np .cos (x [:, 2 ])) \
211
- * - np .cos (x [:, 2 ])
210
+ * (- np .sin (x [:, 2 ])) \
211
+ + 24. * (1. + np .cos (x [:, 2 ])) \
212
+ * - np .cos (x [:, 2 ])
212
213
hessian [:, 3 , 3 ] = 0.2
213
214
214
215
return hessian
215
-
216
+
216
217
state_size = len (x )
217
218
hessian = np .eye (state_size )
218
219
hessian [0 , 0 ] = 12.
219
220
hessian [1 , 1 ] = 0.2
220
221
hessian [2 , 2 ] = 24. * - np .sin (x [2 ]) \
221
- * (- np .sin (x [2 ])) \
222
- + 24. * (1. + np .cos (x [2 ])) \
223
- * - np .cos (x [2 ])
222
+ * (- np .sin (x [2 ])) \
223
+ + 24. * (1. + np .cos (x [2 ])) \
224
+ * - np .cos (x [2 ])
224
225
hessian [3 , 3 ] = 0.2
225
226
226
227
return hessian [np .newaxis , :, :] * CartPoleConfigModule .TERMINAL_WEIGHT
@@ -239,7 +240,7 @@ def hessian_cost_fn_with_input(x, u):
239
240
(pred_len , _ ) = u .shape
240
241
241
242
return np .tile (2. * CartPoleConfigModule .R , (pred_len , 1 , 1 ))
242
-
243
+
243
244
@staticmethod
244
245
def hessian_cost_fn_with_input_state (x , u ):
245
246
""" hessian costs with respect to the state and input
@@ -254,4 +255,4 @@ def hessian_cost_fn_with_input_state(x, u):
254
255
(_ , state_size ) = x .shape
255
256
(pred_len , input_size ) = u .shape
256
257
257
- return np .zeros ((pred_len , input_size , state_size ))
258
+ return np .zeros ((pred_len , input_size , state_size ))
0 commit comments