forked from irom-princeton/PAC-Bayes-Control
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
404 lines (275 loc) · 13.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# Main script for "PAC-Bayes Control: Synthesizing Controllers that Provably Generalize to Novel Environments"
# Implements Relative Entropy Programming version of approach
# Requirements:
# - PyBullet: https://pybullet.org/wordpress/
# - SCS: https://github.com/cvxgrp/scs
import pybullet as pybullet
import numpy as np
import time
from optimize_PAC_bound import *
from utils_simulation import *
###########################################################################################
# Functions
# Utility function for computing KL divergence
def kl_divergence(p,q):
L = len(p)
kl_p_q = 0.0
for l in range(0,L):
if (p[l] > 1e-6): # if p[l] is close to 0, then 0*log(0) := 0
kl_p_q = kl_p_q + p[l]*np.log((p[l])/(q[l]))
return kl_p_q
# Compute cost function
def cost_fcn(p, p0, delta, precomputed_costs, pac_bayes):
# m: number of different environments (i.e., # of samples)
m = np.shape(precomputed_costs)[0]
# Compute average cost
costs_envs = np.matmul(precomputed_costs, p)
cost = np.mean(costs_envs)
if (pac_bayes): # If we're applying PAC-Bayes theory
# Now compute KL term
kl_p_p0 = kl_divergence(p,p0)
# "Regularizer" term (this holds for all [0,1]-bounded loss functions [Maurer '04])
cost_kl = np.sqrt((kl_p_p0 + np.log((2*np.sqrt(m))/delta))/(2*m))
cost = cost + cost_kl
return cost
# Precompute costs for different environments and controllers
def precompute_environment_costs(numEnvs, K, L, params, husky, sphere, GUI, seed):
# Parameters
numRays = params['numRays']
senseRadius = params['senseRadius']
robotRadius = params['robotRadius']
robotHeight = params['robotHeight']
thetas_nominal = params['thetas_nominal']
T_horizon = params['T_horizon']
# Fix random seed for consistency of results
np.random.seed(seed)
# Initialize costs for the different environments and different controllers
costs = np.zeros((numEnvs, L))
for env in range(0,numEnvs):
# Print
if (env%10 == 0):
print(env, "out of", numEnvs)
# Sample environment
heightObs = 20*robotHeight
obsUid = generate_obstacles(pybullet, heightObs, robotRadius)
for l in range(0,L):
# Initialize position of robot
state = [0.0, 1.0, 0.0] # [x, y, theta]
quat = pybullet.getQuaternionFromEuler([0.0, 0.0, state[2]+np.pi/2]) # pi/2 since Husky visualization is rotated by pi/2
pybullet.resetBasePositionAndOrientation(husky, [state[0], state[1], 0.0], quat)
pybullet.resetBasePositionAndOrientation(sphere, [state[0], state[1], robotHeight], [0,0,0,1])
# Cost for this particular controller (lth controller) in this environment
cost_env_l = 0.0
for t in range(0, T_horizon):
# Get sensor measurement
y = getDistances(pybullet, state, robotHeight, numRays, senseRadius, thetas_nominal)
# Compute control input
u = compute_control(y, K[l])
# Update state
state = robot_update_state(state, u)
# Update position of pybullet object
quat = pybullet.getQuaternionFromEuler([0.0, 0.0, state[2]+np.pi/2]) # pi/2 since Husky visualization is rotated by pi/2
pybullet.resetBasePositionAndOrientation(husky, [state[0], state[1], 0.0], quat)
pybullet.resetBasePositionAndOrientation(sphere, [state[0], state[1], robotHeight], [0,0,0,1])
if (GUI):
pybullet.resetDebugVisualizerCamera(cameraDistance=5.0, cameraYaw=0.0, cameraPitch=-45.0, cameraTargetPosition=[state[0], state[1], 2*robotHeight])
time.sleep(0.025)
# Check if the robot is in collision. If so, cost = 1.0.
# Get closest points. Note: Last argument is distance threshold. Since it's set to 0, the function will only return points if the distance is less than zero. So, closestPoints is non-empty iff there is a collision.
closestPoints = pybullet.getClosestPoints(sphere, obsUid, 0.0)
# See if the robot is in collision. If so, cost = 1.0.
if closestPoints: # Check if closestPoints is non-empty
cost_env_l = 1.0
break # break out of simulation for this environment
# Check that cost is between 0 and 1 (for sanity)
if (cost_env_l > 1.0):
raise ValueError("Cost is greater than 1!")
if (cost_env_l < 0.0):
raise ValueError("Cost is less than 0!")
# Record cost for this environment and this controller
costs[env][l] = cost_env_l
# Remove obstacles
pybullet.removeBody(obsUid)
return costs
# Compute expected cost (just a utility function)
def expected_cost(p, precomputed_costs):
# Compute average cost
costs_envs = np.matmul(precomputed_costs, p)
cost_mean = np.mean(costs_envs)
return cost_mean
# Compute control input
def compute_control(y, K):
# y is vector of depth measurements
u_diff = np.matmul(1./y, K)
return u_diff
def simulate_controller(numEnvs, K, p_opt, params, husky, sphere, GUI, seed):
# Parameters
numRays = params['numRays']
senseRadius = params['senseRadius']
robotRadius = params['robotRadius']
robotHeight = params['robotHeight']
thetas_nominal = params['thetas_nominal']
T_horizon = params['T_horizon']
# Fix random seed for consistency of results
np.random.seed(seed)
for env in range(0,numEnvs):
# Print
if (env%100 == 0):
print(env, "out of", numEnvs)
# Sample environment
heightObs = 20*robotHeight
obsUid = generate_obstacles(pybullet, heightObs, robotRadius)
# Choose controller by sampling
l = np.random.choice(range(0,len(p_opt_pac)), 1, p=p_opt_pac)
l = l[0]
# Initialize position of robot
state = [0.0, 1.0, 0.0] # [x, y, theta]
quat = pybullet.getQuaternionFromEuler([0.0, 0.0, state[2]+np.pi/2]) # pi/2 since Husky visualization is rotated by pi/2
pybullet.resetBasePositionAndOrientation(husky, [state[0], state[1], 0.0], quat)
pybullet.resetBasePositionAndOrientation(sphere, [state[0], state[1], robotHeight], [0,0,0,1])
for t in range(0, T_horizon):
# Get sensor measurement
y = getDistances(pybullet, state, robotHeight, numRays, senseRadius, thetas_nominal)
# Compute control input
u = compute_control(y, K[l])
# Update state
state = robot_update_state(state, u)
# print(state)
# Update position of pybullet object
quat = pybullet.getQuaternionFromEuler([0.0, 0.0, state[2]+np.pi/2]) # pi/2 since Husky visualization is rotated by pi/2
pybullet.resetBasePositionAndOrientation(husky, [state[0], state[1], 0.0], quat)
pybullet.resetBasePositionAndOrientation(sphere, [state[0], state[1], robotHeight], [0,0,0,1])
if (GUI):
pybullet.resetDebugVisualizerCamera(cameraDistance=5.0, cameraYaw=0.0, cameraPitch=-45.0, cameraTargetPosition=[state[0], state[1], 2*robotHeight])
time.sleep(0.025)
# Check if the robot is in collision. If so, cost = 1.0.
# Get closest points. Note: Last argument is distance threshold. Since it's set to 0, the function will only return points if the distance is less than zero. So, closestPoints is non-empty iff there is a collision.
closestPoints = pybullet.getClosestPoints(sphere, obsUid, 0.0)
# See if the robot is in collision. If so, cost = 1.0.
if closestPoints: # Check if closestPoints is non-empty
# cost_env_l = 1.0
break # break out of simulation for this environment
# Remove obstacles
pybullet.removeBody(obsUid)
return True
###########################################################################################
# Initial setup
# Flag that sets if things are visualized
# GUI = True; # Only for debugging purposes
GUI = False;
# PAC parameters
m = 100 # Number of environments to sample
delta = 0.01 # PAC failure probability
# Random seed (for consistency of results)
random_seed = 5
# pyBullet
if (GUI):
pybullet.connect(pybullet.GUI)
else:
pybullet.connect(pybullet.DIRECT)
# Get some robot parameters
params = get_parameters()
robotRadius = params['robotRadius']
numRays = params['numRays']
thetas_nominal = params['thetas_nominal']
# Ground plane
pybullet.loadURDF("./URDFs/plane.urdf")
# Load robot from URDF
husky = pybullet.loadURDF("./URDFs/husky.urdf", globalScaling=0.5)
# Sphere
colSphereId = pybullet.createCollisionShape(pybullet.GEOM_SPHERE, radius=robotRadius)
mass = 0
if (GUI):
visualShapeId = pybullet.createVisualShape(pybullet.GEOM_SPHERE, radius=robotRadius,rgbaColor=[0,0,0,0]) # This just makes sure that the sphere is not visible (we only use the sphere for collision checking)
else:
visualShapeId = -1
sphere = pybullet.createMultiBody(mass,colSphereId,visualShapeId)
################################################################################
# Controller and optimization setup
# Choose L controllers
num_x_intercepts = 5
num_y_intercepts = 10
L = num_x_intercepts*num_y_intercepts
x_intercepts = np.linspace(0.1, 5.0, num_x_intercepts)
y_intercepts = np.linspace(0.0, 10.0, num_y_intercepts)
K = L*[None]
for i in range(num_x_intercepts):
for j in range(num_y_intercepts):
K[i*num_y_intercepts + j] = np.zeros((numRays,1))
for r in range(numRays):
if (thetas_nominal[r] > 0):
K[i*num_y_intercepts + j][r] = y_intercepts[j]*(x_intercepts[i] - thetas_nominal[r])/x_intercepts[i]
else:
K[i*num_y_intercepts + j][r] = y_intercepts[j]*(-x_intercepts[i] - thetas_nominal[r])/x_intercepts[i]
################################################################################
# Prior for PAC-Bayes
p0 = L*[1.0/L] # Probability vector corresponding to prior (should sum to 1).
################################################################################
# Perform optimizations
# Do precomputations for optimization
print(" ")
print(" ")
print("Precomputing costs for environments (could/should easily be parallelized)...")
tic = time.time()
# Precompute costs for m different environments and L controllers
costs_precomputed = precompute_environment_costs(m, K, L, params, husky, sphere, GUI, random_seed)
toc = time.time()
time_precomputation = toc-tic
print("Done precomputing.")
# Optimize with PAC-Bayes bound
tic = time.time()
print("Optimizing controller (PAC-Bayes)...")
# Using Relative Entropy Programming
bound_REP, p_opt_pac, taus = optimize_PAC_bound(costs_precomputed, p0, delta)
# Save results
# np.savez('results_REP', p_opt_pac=p_opt_pac, K=K)
# Compute KL inverse bound
pac_bound = kl_inverse(expected_cost(p_opt_pac, costs_precomputed), (kl_divergence(p_opt_pac,p0)+np.log(2*np.sqrt(m)/delta))/m)
print("Done optimizing.")
toc = time.time()
time_optimization = toc-tic
################################################################################
################################################################################
# Estimate true expected cost
print("Precomputing costs for estimating true cost...")
# Precompute costs for different environments and controllers
numEnvs = 100 # 100000 were used in the paper (here, we're using few environments to speed things up)
seed = 10 # Different random seed
costs_precomputed_for_est = precompute_environment_costs(numEnvs, K, L, params, husky, sphere, GUI, seed)
print("Done precomputing.")
print("Estimating true cost for PAC-Bayes controller based on ", numEnvs, " environments...")
true_cost_pac = expected_cost(p_opt_pac, costs_precomputed_for_est)
print("Done estimating true cost.")
################################################################################
# Print results
print("Time for precomputation: ", time_precomputation)
print("Time for optimization: ", time_optimization)
print("Cost bound (PAC-Bayes): ", pac_bound)
print("True cost estimate (PAC-Bayes): ", true_cost_pac)
# Disconect from pybullet
pybullet.disconnect()
################################################################################
# Run optimized controller on some environments to visualize
# Flag that sets if things are visualized
GUI = True;
pybullet.connect(pybullet.GUI)
# Clean up probabilities
p_opt_pac = np.maximum(p_opt_pac, 0)
p_opt_pac = p_opt_pac/np.sum(p_opt_pac)
random_seed = 25 #
numEnvs = 10 # Number of environments to show videos for
# Ground plane
pybullet.loadURDF("./URDFs/plane.urdf")
# Load robot from URDF
husky = pybullet.loadURDF("./URDFs/husky.urdf", globalScaling=0.5)
# Sphere
colSphereId = pybullet.createCollisionShape(pybullet.GEOM_SPHERE, radius=robotRadius)
mass = 0
visualShapeId = pybullet.createVisualShape(pybullet.GEOM_SPHERE, radius=robotRadius,rgbaColor=[0,0,0,0]) # This just makes sure that the sphere is not visible (we only use the sphere for collision checking)
sphere = pybullet.createMultiBody(mass,colSphereId,visualShapeId)
print("Simulating optimized controller in a few environments...")
# Play videos
simulate_controller(numEnvs, K, p_opt_pac, params, husky, sphere, GUI, random_seed)
# Disconect from pybullet
pybullet.disconnect()
print("Done.")