Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Hmzh/refactor costs p2 #102

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion example/CouplingExample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ stackelberg_leader_idx = 2
function coupling_example()

dt = 0.1
dyn = ShepherdAndSheepDynamics(dt)
cont_lin_dyn = ShepherdAndSheepDynamics()
dyn = discretize(cont_lin_dyn, dt)
costs = ShepherdAndSheepCosts()

# Initial condition chosen randomly. Ensure both have relatively low speed.
Expand Down
12 changes: 8 additions & 4 deletions example/ExampleMixedStateDynamics/SimpleKinematic1DDynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,20 @@ function generate_dynamics(a)

i = 1

A(t) = [1 0; t 1]
B(t) = [t; 0][:,:] # The multiplier here is the difference between states.
D(t) = [1 0; 0 1]
A = [0 0; 1. 0]
B = [1.; 0][:,:] # The multiplier here is the difference between states.
D = [1. 0; 0 1.]
cont_dyn = ContinuousLinearDynamics(A, [B], D)
function f_dynamics(t_range, x, u, v)
t0 = t_range[1]
t = t_range[2]
@assert t ≥ t0

# no change if the times are the same.
if t == t0 return x end

dt = t - t0
dyn = LinearDynamics(A(dt), [B(dt)], D(dt))
dyn = discretize(cont_dyn, dt)

return propagate_dynamics(dyn, t_range, x, [a * u], v[:])
end
Expand Down
10 changes: 7 additions & 3 deletions example/ILQRExamples/RunILQRExample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ include("params_doubleintegrator_quadoffset.jl")
us_1 = zeros(udim(dyn), T)

# constant inputs - same as Jake
us_1[1,:] .= 0.1
us_1[2,:] .= 0.01
# us_1[1,:] .= 0.1
# us_1[2,:] .= 0.01

# Linearly interpolate.
# duration = (T-1) * dt
Expand All @@ -27,7 +27,11 @@ us_1[2,:] .= 0.01
# _, us_1 = unroll_feedback(dyn, times, ctrl_strats, x0)
# us_1 = us_1[1] + randn(size(us_1[1])) * 0.1

xs_i, us_i, is_converged, num_iters, conv_metrics, evaluated_costs = ilqr(T, t0, times, dyn, selected_cost, x0, us_1; max_iters=1000, step_size=0.01, threshold=1e-4, verbose=true)
max_iters=2
step_size=1.
threshold=1e-3
verbose=true
xs_i, us_i, is_converged, num_iters, conv_metrics, evaluated_costs = ilqr(T, t0, times, dyn, selected_cost, x0, us_1; max_iters, step_size, threshold, verbose)
final_cost_total = evaluate(selected_cost, xs_i, [us_i])

println("final: ", xs_i[:, T], " with trajectory cost: ", final_cost_total)
Expand Down
8 changes: 3 additions & 5 deletions example/ILQRExamples/RunLQRExample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,15 @@ include("params_time.jl")
include("params_doubleintegrator_quadoffset.jl")

time_range = (0.0, horizon)
dummy_us = [zeros(udim(dyn, ii)) for ii in 1:num_agents(dyn)]
lqr_quad_cost_x0 = quadraticize_costs(quad_w_offset_cost, time_range, x0, dummy_us)

# Solve optimal control problem.
println("initial state: ", x0')
println("desired state at time T: ", round.(xf', sigdigits=6), " over ", round(horizon, sigdigits=4), " seconds.")

ctrl_strats, _ = solve_lqr_feedback(dyn, lqr_quad_cost_x0, T)
xs_i, us_i = unroll_feedback(dyn, times, ctrl_strats, x0)
dummy_us = [zeros(udim(dyn, ii)) for ii in 1:num_agents(dyn)]
lqr_quad_cost_x0 = quadraticize_costs(quad_w_offset_cost, time_range, x0, dummy_us)

final_cost_total = evaluate(quad_w_offset_cost, xs_i, us_i)
final_cost_total = evaluate(selected_cost, xs_i, us_i)
println("final: ", xs_i[:, T], " with trajectory cost: ", final_cost_total)


Expand Down
38 changes: 28 additions & 10 deletions example/ILQRExamples/params_doubleintegrator_quadoffset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,40 @@ println()
#####################################
# Define the dynamics. #
#####################################
dyn = LinearDynamics([1. 0. dt 0.;
0. 1. 0. dt;
0. 0. 1. 0.;
0. 0. 0. 1.],
[vcat(zeros(2,2),
[dt 0; 0 dt])]) # 2d double integrator [x y xdot ydot]

cont_dyn = ContinuousLinearDynamics([0. 0. 1. 0.;
0. 0. 0. 1.;
0. 0. 0. 0.;
0. 0. 0. 0.],
[vcat(zeros(2,2),
[1. 0; 0 1.])]) # 2d double integrator [x y xdot ydot]
dyn = discretize(cont_dyn, dt)

#####################################
# Define the costs. #
#####################################
Q = Matrix(Diagonal(1*[1., 1., 1., 1.]))
R = Matrix(Diagonal(1*[1., 1.]))
quad_cost = QuadraticCost(Q)
add_control_cost!(quad_cost, 1, R)
quad_w_offset_cost = QuadraticCostWithOffset(quad_cost, xf)

# Make the quadratic cost to be used for LQR.
lqr_quad_cost = QuadraticCost(Q)
add_control_cost!(lqr_quad_cost, 1, R)

# Before adding offsets, copy it for use in the tracking quadratic cost.
quad_cost = deepcopy(lqr_quad_cost)

# Finish up the one for LQR.
add_offsets!(lqr_quad_cost, xf, get_zero_ctrls(dyn))

# default zero controls
us_1 = zeros(udim(dyn), T)

# constant inputs - same as Jake
us_1[1,:] .= 0.1
us_1[2,:] .= 0.01

# Zero-offsets for the one in the tracking cost.
add_offsets!(quad_cost, zeros(xdim(dyn)), get_zero_ctrls(dyn))
quad_w_offset_cost = make_quadratic_tracking_cost(quad_cost, xf, get_zero_ctrls(dyn))

println("setting cost to Quadratic Offset Cost")
selected_cost = quad_w_offset_cost
4 changes: 2 additions & 2 deletions example/ILQRExamples/params_unicycle_nonlinearexample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ println()
#####################################
# Define the dynamics. #
#####################################
dyn = UnicycleDynamics(num_players)
dyn = UnicycleDynamics(num_players, dt)


#####################################
Expand All @@ -28,7 +28,7 @@ Q = Matrix(Diagonal(1*[1., 1., 1., 1.]))
R = Matrix(Diagonal(1*[1., 1.]))
quad_cost = QuadraticCost(Q)
add_control_cost!(quad_cost, 1, R)
quad_w_offset_cost = QuadraticCostWithOffset(quad_cost, xf)
quad_w_offset_cost = QuadraticTrackingCost(quad_cost, xf)

# TODO(hmzh) - Implement this particular non-quadratic cost and test.
const_multiplier = 1.0
Expand Down
8 changes: 4 additions & 4 deletions example/ILQRExamples/params_unicycle_quadoffset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ println()
#####################################
# Define the dynamics. #
#####################################
dyn = UnicycleDynamics(num_players)

dyn = UnicycleDynamics(num_players, dt)

#####################################
# Define the costs. #
Expand All @@ -27,8 +26,9 @@ Q = Matrix(Diagonal(1*[1., 1., 1., 1.]))
R = Matrix(Diagonal(1*[1., 1.]))
quad_cost = QuadraticCost(Q)
add_control_cost!(quad_cost, 1, R)
quad_w_offset_cost = QuadraticCostWithOffset(quad_cost, xf)
add_offsets!(quad_cost, xf, get_zero_ctrls(dyn))
# quad_w_offset_cost = QuadraticTrackingCost(quad_cost, xf)

println("setting cost to Quadratic Offset Cost")
selected_cost = quad_w_offset_cost
selected_cost = quad_cost

5 changes: 3 additions & 2 deletions example/LeadershipFilteringExamples/leadfilt_LQ_parameters.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
using StackelbergControlHypothesesFiltering

dt = 0.05
T = 151
T = 301
t0 = 0.0
horizon = T * dt
# TODO(hamzah) - We do double the times as needed so that there's extra for the Stackelberg history. Make this tight.
times = dt * (cumsum(ones(2*T)) .- 1)

dyn = ShepherdAndSheepDynamics(dt)
cont_lin_dyn = ShepherdAndSheepDynamics()
dyn = discretize(cont_lin_dyn, dt)
costs = ShepherdAndSheepCosts()
num_players = num_agents(dyn)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ horizon = T * dt
# TODO(hamzah) - We do double the times as needed so that there's extra for the Stackelberg history. Make this tight.
times = dt * (cumsum(ones(2*T)) .- 1)

dyn = ShepherdAndSheepWithUnicycleDynamics()
dyn = ShepherdAndSheepWithUnicycleDynamics(dt)
costs = UnicycleShepherdAndSheepWithQuadraticCosts()
num_players = num_agents(dyn)

Expand Down Expand Up @@ -69,7 +69,7 @@ max_iters = 50
step_size = 0.01

# Generate the ground truth.
costs = [QuadraticCostWithOffset(costs[1]), QuadraticCostWithOffset(costs[2])]
costs = [QuadraticTrackingCost(costs[1]), QuadraticTrackingCost(costs[2])]

# leader_idx=2
gt_silq_num_runs=1
Expand Down
6 changes: 3 additions & 3 deletions example/SILQGamesExamples/GenerateSILQGamesAnimation.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
include("RunSILQGamesOnLQExample.jl")
# include("RunSILQGamesOnQuadraticNonlinearGame.jl")
# include("RunSILQGamesOnLQExample.jl")
include("RunSILQGamesOnQuadraticNonlinearGame.jl")

using Plots
using ProgressBars
Expand Down Expand Up @@ -27,7 +27,7 @@ previous_GKSwstype = get(ENV, "GKSwstype", "")
ENV["GKSwstype"] = "100"

println("giffying...")
gif(anim, "silqgames_animation.gif", fps = 20)
gif(anim, "silqgames_animation.gif", fps = 50)
println("done")

# Speeds up call to gif (p.2/2) - https://discourse.julialang.org/t/why-is-my-animate-loop-super-slow/43685/4
Expand Down
3 changes: 2 additions & 1 deletion example/SILQGamesExamples/LQ_parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ T = 201
horizon = T * dt
times = dt * (cumsum(ones(T)) .- 1)

dyn = ShepherdAndSheepDynamics(dt)
cont_dyn = ShepherdAndSheepDynamics()
dyn = discretize(cont_dyn, dt)
costs = ShepherdAndSheepCosts()
num_players = num_agents(dyn)

Expand Down
2 changes: 1 addition & 1 deletion example/SILQGamesExamples/RunSILQGamesOnLQExample.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using LinearAlgebra

include("LQ_parameters.jl")

costs = [QuadraticCostWithOffset(costs[1]), QuadraticCostWithOffset(costs[2])]
costs = [QuadraticTrackingCost(costs[1]), QuadraticTrackingCost(costs[2])]

num_runs=1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ using LinearAlgebra

include("quadratic_nonlinear_parameters.jl")

costs = [QuadraticCostWithOffset(costs[1]), QuadraticCostWithOffset(costs[2])]
costs = [QuadraticTrackingCost(costs[1]), QuadraticTrackingCost(costs[2])]

num_runs=1

# config variables
threshold=0.001
max_iters=1000
step_size=1e-2
threshold=1e-4
max_iters=10000
step_size=1e-3
verbose=true

sg_obj = initialize_silq_games_object(num_runs, T, dyn, costs;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ T = 301
horizon = T * dt
times = dt * (cumsum(ones(T)) .- 1)

dyn = ShepherdAndSheepWithUnicycleDynamics()
dyn = ShepherdAndSheepWithUnicycleDynamics(dt)
costs = UnicycleShepherdAndSheepWithQuadraticCosts()
num_players = num_agents(dyn)

Expand Down
2 changes: 1 addition & 1 deletion src/StackelbergControlHypothesesFiltering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include("Utils.jl")

include("costs/CostUtils.jl")
include("costs/QuadraticCost.jl")
include("costs/QuadraticCostWithOffset.jl")
include("costs/QuadraticTrackingCost.jl")

include("costs/examples/ExampleILQRCost.jl")

Expand Down
59 changes: 57 additions & 2 deletions src/Utils.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,49 @@
# Utilities

# TODO(hamzah) - Combine split_vec and split functions.

# A function that splits 2D arrays into vectors of 2D arrays
# Use case. combining and splitting control inputs.
function split(x, n)
if ndims(x) == 1
return split_vec(x, n)
end

@assert sum(n) == size(x, 1)

result = Vector{Matrix{eltype(x)}}()
start = firstindex(x)
for len in n
push!(result, x[start:(start + len - 1), :])
start += len
end
return result
end

function split_vec(x::AbstractVector{T}, n) where {T}
@assert sum(n) == size(x, 1)

result = Vector{Vector{eltype(x)}}()
start = firstindex(x)
for len in n
push!(result, x[start:(start + len - 1)])
start += len
end
return result
end

export split

# TODO(hmzh) Add a game class of some sort that ties together the system info, cost, and dynamics.
struct SystemInfo
num_agents::Int
num_x::Int
num_us::AbstractArray{Int}
num_v::Int
dt::Float64 # If this is set to 0, the system is continuous.
end
SystemInfo(num_agents, num_x, num_us) = SystemInfo(num_agents, num_x, num_us, 0)
SystemInfo(num_agents, num_x, num_us, dt=0.) = SystemInfo(num_agents, num_x, num_us, 0, dt)
SystemInfo(si::SystemInfo, dt) = SystemInfo(si.num_agents, si.num_x, si.num_us, si.num_v, dt)

function num_agents(sys_info::SystemInfo)
return sys_info.num_agents
Expand All @@ -26,11 +61,31 @@ function udim(sys_info::SystemInfo, player_idx)
return sys_info.num_us[player_idx]
end

function udims(sys_info::SystemInfo)
return sys_info.num_us
end

function get_zero_ctrls(sys_info::SystemInfo)
return [zeros(udim(sys_info, ii)) for ii in 1:num_agents(sys_info)]
end

function vdim(sys_info::SystemInfo)
return sys_info.num_v
end

export SystemInfo, num_agents, xdim, udim, vdim
function sampling_time(sys_info::SystemInfo)
return sys_info.dt
end

function is_continuous(sys_info::SystemInfo)
return iszero(sampling_time(sys_info))
end

function get_discretized_system_info(sys_info::SystemInfo, new_dt)
return SystemInfo(sys_info, new_dt)
end

export SystemInfo, num_agents, xdim, udim, udims, get_zero_ctrls, vdim, sampling_time, is_continuous, get_discretized_system_info


# Wraps angles to the range [-pi, pi).
Expand Down
10 changes: 5 additions & 5 deletions src/control_strategies/FeedbackGainControlStrategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
struct FeedbackGainControlStrategy <: MultiplayerControlStrategy
num_players::Int # number of players
horizon::Int # horizon
Ps::AbstractVector{<:AbstractArray{Float64, 3}} # linear feedback gains
ps::AbstractVector{<:AbstractMatrix{Float64}} # constant feedback terms
Ps # linear feedback gains
ps # constant feedback terms
end
FeedbackGainControlStrategy(Ps::AbstractVector{<:AbstractArray{Float64, 3}},
ps::AbstractVector{<:AbstractMatrix{Float64}}=[zeros(size(Ps[ii], 1), size(Ps[ii], 3)) for ii in 1:length(Ps)]) = FeedbackGainControlStrategy(length(Ps), size(Ps[1], 3), Ps, ps)
FeedbackGainControlStrategy(Ps,
ps=[zeros(size(Ps[ii], 1), size(Ps[ii], 3)) for ii in 1:length(Ps)]) = FeedbackGainControlStrategy(length(Ps), size(Ps[1], 3), Ps, ps)

# This function accepts a feedback gain control strategy and applies it to a state at a given time (i.e. index).
function apply_control_strategy(tt::Int, strategy::FeedbackGainControlStrategy, x::AbstractArray{Float64})
function apply_control_strategy(tt::Int, strategy::FeedbackGainControlStrategy, x)
return [-strategy.Ps[ii][:, :, tt] * x - strategy.ps[ii][:, tt] for ii in 1:strategy.num_players]
end

Expand Down
Loading