Skip to content

Commit 0d443f7

Browse files
author
Shunichi09
authored
Merge pull request #11 from Shunichi09/develop
Add nonlinear sample Env
2 parents 969fee7 + 8c28ff3 commit 0d443f7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1227
-561
lines changed

Environments.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
| Two wheeled System (Constant Goal) | x || 3 | 2 |
77
| Two wheeled System (Moving Goal) (Coming soon) | x || 3 | 2 |
88
| Cartpole (Swing up) | x || 4 | 1 |
9+
| Nonlinear Sample System Env | x || 2 | 1 |
10+
911

1012
## [FistOrderLagEnv](PythonLinearNonlinearControl/envs/first_order_lag.py)
1113

@@ -53,4 +55,14 @@ mc = 1, mp = 0.2, l = 0.5, g = 9.81
5355

5456
### Cost.
5557

56-
<img src="assets/cartpole_score.png" width="300">
58+
<img src="assets/cartpole_score.png" width="300">
59+
60+
## [Nonlinear Sample System Env](PythonLinearNonlinearControl/envs/nonlinear_sample_system.py)
61+
62+
## System equation.
63+
64+
<img src="assets/nonlinear_sample_system.png" width="400">
65+
66+
### Cost.
67+
68+
<img src="assets/nonlinear_sample_system_score.png" width="400">

PythonLinearNonlinearControl/common/utils.py

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import numpy as np
22

3+
34
def rotate_pos(pos, angle):
45
""" Transformation the coordinate in the angle
5-
6+
67
Args:
78
pos (numpy.ndarray): local state, shape(data_size, 2)
89
angle (float): rotate angle, in radians
@@ -14,9 +15,10 @@ def rotate_pos(pos, angle):
1415

1516
return np.dot(pos, rot_mat.T)
1617

18+
1719
def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
1820
""" Check angle range and correct the range
19-
21+
2022
Args:
2123
angle (numpy.ndarray): in radians
2224
min_angle (float): maximum of range in radians, default -pi
@@ -29,7 +31,7 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
2931
if (max_angle - min_angle) < 2.0 * np.pi:
3032
raise ValueError("difference between max_angle \
3133
and min_angle must be greater than 2.0 * pi")
32-
34+
3335
output = np.array(angles)
3436
output_shape = output.shape
3537

@@ -41,4 +43,76 @@ def fit_angle_in_range(angles, min_angle=-np.pi, max_angle=np.pi):
4143
output += min_angle
4244

4345
output = np.minimum(max_angle, np.maximum(min_angle, output))
44-
return output.reshape(output_shape)
46+
return output.reshape(output_shape)
47+
48+
49+
def update_state_with_Runge_Kutta(state, u, functions, dt=0.01, batch=True):
50+
""" update state in Runge Kutta methods
51+
Args:
52+
state (array-like): state of system
53+
u (array-like): input of system
54+
functions (list): update function of each state,
55+
each function will be called like func(state, u)
56+
We expect that this function returns differential of each state
57+
dt (float): float in seconds
58+
batch (bool): state and u is given by batch or not
59+
60+
Returns:
61+
next_state (np.array): next state of system
62+
63+
Notes:
64+
sample of function is as follows:
65+
66+
def func_x(self, x_1, x_2, u):
67+
x_dot = (1. - x_1**2 - x_2**2) * x_2 - x_1 + u
68+
return x_dot
69+
70+
Note that the function return x_dot.
71+
"""
72+
if not batch:
73+
state_size = len(state)
74+
assert state_size == len(functions), \
75+
"Invalid functions length, You need to give the state size functions"
76+
77+
k0 = np.zeros(state_size)
78+
k1 = np.zeros(state_size)
79+
k2 = np.zeros(state_size)
80+
k3 = np.zeros(state_size)
81+
82+
for i, func in enumerate(functions):
83+
k0[i] = dt * func(state, u)
84+
85+
for i, func in enumerate(functions):
86+
k1[i] = dt * func(state + k0 / 2., u)
87+
88+
for i, func in enumerate(functions):
89+
k2[i] = dt * func(state + k1 / 2., u)
90+
91+
for i, func in enumerate(functions):
92+
k3[i] = dt * func(state + k2, u)
93+
94+
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.
95+
96+
else:
97+
batch_size, state_size = state.shape
98+
assert state_size == len(functions), \
99+
"Invalid functions length, You need to give the state size functions"
100+
101+
k0 = np.zeros((batch_size, state_size))
102+
k1 = np.zeros((batch_size, state_size))
103+
k2 = np.zeros((batch_size, state_size))
104+
k3 = np.zeros((batch_size, state_size))
105+
106+
for i, func in enumerate(functions):
107+
k0[:, i] = dt * func(state, u)
108+
109+
for i, func in enumerate(functions):
110+
k1[:, i] = dt * func(state + k0 / 2., u)
111+
112+
for i, func in enumerate(functions):
113+
k2[:, i] = dt * func(state + k1 / 2., u)
114+
115+
for i, func in enumerate(functions):
116+
k3[:, i] = dt * func(state + k2, u)
117+
118+
return state + (k0 + 2. * k1 + 2. * k2 + k3) / 6.

PythonLinearNonlinearControl/configs/cartpole.py

Lines changed: 55 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22

3+
34
class CartPoleConfigModule():
45
# parameters
56
ENV_NAME = "CartPole-v0"
@@ -12,7 +13,7 @@ class CartPoleConfigModule():
1213
DT = 0.02
1314
# cost parameters
1415
R = np.diag([0.01]) # 0.01 is worked for MPPI and CEM and MPPIWilliams
15-
# 1. is worked for iLQR
16+
# 1. is worked for iLQR
1617
TERMINAL_WEIGHT = 1.
1718
Q = None
1819
Sf = None
@@ -39,41 +40,41 @@ def __init__(self):
3940
"num_elites": 50,
4041
"max_iters": 15,
4142
"alpha": 0.3,
42-
"init_var":9.,
43-
"threshold":0.001
43+
"init_var": 9.,
44+
"threshold": 0.001
4445
},
45-
"MPPI":{
46-
"beta" : 0.6,
46+
"MPPI": {
47+
"beta": 0.6,
4748
"popsize": 5000,
4849
"kappa": 0.9,
4950
"noise_sigma": 0.5,
5051
},
51-
"MPPIWilliams":{
52+
"MPPIWilliams": {
5253
"popsize": 5000,
5354
"lambda": 1.,
5455
"noise_sigma": 0.9,
5556
},
56-
"iLQR":{
57+
"iLQR": {
5758
"max_iter": 500,
5859
"init_mu": 1.,
5960
"mu_min": 1e-6,
6061
"mu_max": 1e10,
6162
"init_delta": 2.,
6263
"threshold": 1e-6,
63-
},
64-
"DDP":{
64+
},
65+
"DDP": {
6566
"max_iter": 500,
6667
"init_mu": 1.,
6768
"mu_min": 1e-6,
6869
"mu_max": 1e10,
6970
"init_delta": 2.,
7071
"threshold": 1e-6,
71-
},
72-
"NMPC-CGMRES":{
73-
},
74-
"NMPC-Newton":{
75-
},
76-
}
72+
},
73+
"NMPC-CGMRES": {
74+
},
75+
"NMPC-Newton": {
76+
},
77+
}
7778

7879
@staticmethod
7980
def input_cost_fn(u):
@@ -87,7 +88,7 @@ def input_cost_fn(u):
8788
shape(pop_size, pred_len, input_size)
8889
"""
8990
return (u**2) * np.diag(CartPoleConfigModule.R)
90-
91+
9192
@staticmethod
9293
def state_cost_fn(x, g_x):
9394
""" state cost function
@@ -103,21 +104,21 @@ def state_cost_fn(x, g_x):
103104
"""
104105

105106
if len(x.shape) > 2:
106-
return (6. * (x[:, :, 0]**2) \
107-
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
108-
+ 0.1 * (x[:, :, 1]**2) \
109-
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
107+
return (6. * (x[:, :, 0]**2)
108+
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2)
109+
+ 0.1 * (x[:, :, 1]**2)
110+
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
110111

111112
elif len(x.shape) > 1:
112-
return (6. * (x[:, 0]**2) \
113-
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \
114-
+ 0.1 * (x[:, 1]**2) \
115-
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
116-
113+
return (6. * (x[:, 0]**2)
114+
+ 12. * ((np.cos(x[:, 2]) + 1.)**2)
115+
+ 0.1 * (x[:, 1]**2)
116+
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
117+
117118
return 6. * (x[0]**2) \
118-
+ 12. * ((np.cos(x[2]) + 1.)**2) \
119-
+ 0.1 * (x[1]**2) \
120-
+ 0.1 * (x[3]**2)
119+
+ 12. * ((np.cos(x[2]) + 1.)**2) \
120+
+ 0.1 * (x[1]**2) \
121+
+ 0.1 * (x[3]**2)
121122

122123
@staticmethod
123124
def terminal_state_cost_fn(terminal_x, terminal_g_x):
@@ -134,45 +135,45 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
134135
"""
135136

136137
if len(terminal_x.shape) > 1:
137-
return (6. * (terminal_x[:, 0]**2) \
138-
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
139-
+ 0.1 * (terminal_x[:, 1]**2) \
140-
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
141-
* CartPoleConfigModule.TERMINAL_WEIGHT
142-
143-
return (6. * (terminal_x[0]**2) \
144-
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
145-
+ 0.1 * (terminal_x[1]**2) \
146-
+ 0.1 * (terminal_x[3]**2)) \
138+
return (6. * (terminal_x[:, 0]**2)
139+
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2)
140+
+ 0.1 * (terminal_x[:, 1]**2)
141+
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis] \
147142
* CartPoleConfigModule.TERMINAL_WEIGHT
148-
143+
144+
return (6. * (terminal_x[0]**2)
145+
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2)
146+
+ 0.1 * (terminal_x[1]**2)
147+
+ 0.1 * (terminal_x[3]**2)) \
148+
* CartPoleConfigModule.TERMINAL_WEIGHT
149+
149150
@staticmethod
150151
def gradient_cost_fn_with_state(x, g_x, terminal=False):
151152
""" gradient of costs with respect to the state
152153
153154
Args:
154155
x (numpy.ndarray): state, shape(pred_len, state_size)
155156
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
156-
157+
157158
Returns:
158159
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
159160
or shape(1, state_size)
160161
"""
161162
if not terminal:
162-
cost_dx0 = 12. * x[:, 0]
163+
cost_dx0 = 12. * x[:, 0]
163164
cost_dx1 = 0.2 * x[:, 1]
164165
cost_dx2 = 24. * (1 + np.cos(x[:, 2])) * -np.sin(x[:, 2])
165166
cost_dx3 = 0.2 * x[:, 3]
166-
cost_dx = np.stack((cost_dx0, cost_dx1,\
167+
cost_dx = np.stack((cost_dx0, cost_dx1,
167168
cost_dx2, cost_dx3), axis=1)
168169
return cost_dx
169-
170-
cost_dx0 = 12. * x[0]
170+
171+
cost_dx0 = 12. * x[0]
171172
cost_dx1 = 0.2 * x[1]
172173
cost_dx2 = 24. * (1 + np.cos(x[2])) * -np.sin(x[2])
173174
cost_dx3 = 0.2 * x[3]
174175
cost_dx = np.array([[cost_dx0, cost_dx1, cost_dx2, cost_dx3]])
175-
176+
176177
return cost_dx * CartPoleConfigModule.TERMINAL_WEIGHT
177178

178179
@staticmethod
@@ -206,21 +207,21 @@ def hessian_cost_fn_with_state(x, g_x, terminal=False):
206207
hessian[:, 0, 0] = 12.
207208
hessian[:, 1, 1] = 0.2
208209
hessian[:, 2, 2] = 24. * -np.sin(x[:, 2]) \
209-
* (-np.sin(x[:, 2])) \
210-
+ 24. * (1. + np.cos(x[:, 2])) \
211-
* -np.cos(x[:, 2])
210+
* (-np.sin(x[:, 2])) \
211+
+ 24. * (1. + np.cos(x[:, 2])) \
212+
* -np.cos(x[:, 2])
212213
hessian[:, 3, 3] = 0.2
213214

214215
return hessian
215-
216+
216217
state_size = len(x)
217218
hessian = np.eye(state_size)
218219
hessian[0, 0] = 12.
219220
hessian[1, 1] = 0.2
220221
hessian[2, 2] = 24. * -np.sin(x[2]) \
221-
* (-np.sin(x[2])) \
222-
+ 24. * (1. + np.cos(x[2])) \
223-
* -np.cos(x[2])
222+
* (-np.sin(x[2])) \
223+
+ 24. * (1. + np.cos(x[2])) \
224+
* -np.cos(x[2])
224225
hessian[3, 3] = 0.2
225226

226227
return hessian[np.newaxis, :, :] * CartPoleConfigModule.TERMINAL_WEIGHT
@@ -239,7 +240,7 @@ def hessian_cost_fn_with_input(x, u):
239240
(pred_len, _) = u.shape
240241

241242
return np.tile(2.*CartPoleConfigModule.R, (pred_len, 1, 1))
242-
243+
243244
@staticmethod
244245
def hessian_cost_fn_with_input_state(x, u):
245246
""" hessian costs with respect to the state and input
@@ -254,4 +255,4 @@ def hessian_cost_fn_with_input_state(x, u):
254255
(_, state_size) = x.shape
255256
(pred_len, input_size) = u.shape
256257

257-
return np.zeros((pred_len, input_size, state_size))
258+
return np.zeros((pred_len, input_size, state_size))

0 commit comments

Comments
 (0)