diff --git a/README.md b/README.md index 22e9646..cd7b010 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,14 @@ The following is an indicative list of examples. - REINFORCE algorithm on ```CartPole-v0``` - Expected SARSA on ```CliffWalking-v0``` - Approximate Monte Carlo on ```MountainCar-v0``` -- Monte Carlo tree search on ```Taxi-v3``` +- Monte Carlo tree search on ```Taxi-v3``` - Double Q-learning on ```CartPole-v0``` - First visit Monte Carlo on ```FrozenLake-v0``` ### Filtering - Kalman filtering +- Extended Kalman filter for diff-drive system ### Path planning diff --git a/examples/filtering/CMakeLists.txt b/examples/filtering/CMakeLists.txt index 07a51d4..130e5d0 100644 --- a/examples/filtering/CMakeLists.txt +++ b/examples/filtering/CMakeLists.txt @@ -1,2 +1,3 @@ ADD_SUBDIRECTORY(filtering_example_1) +ADD_SUBDIRECTORY(filtering_example_2) diff --git a/examples/filtering/filtering_example_2/CMakeLists.txt b/examples/filtering/filtering_example_2/CMakeLists.txt new file mode 100644 index 0000000..c09580f --- /dev/null +++ b/examples/filtering/filtering_example_2/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.6 FATAL_ERROR) + +SET(EXECUTABLE filtering_example_2) +SET(SOURCE ${EXECUTABLE}.cpp) + +ADD_EXECUTABLE(${EXECUTABLE} ${SOURCE}) + +TARGET_LINK_LIBRARIES(${EXECUTABLE} cubeailib) + +IF( USE_PYTORCH ) + TARGET_LINK_LIBRARIES(${EXECUTABLE} ${TORCH_LIBRARIES}) +ENDIF() + +TARGET_LINK_LIBRARIES(${EXECUTABLE} pthread) +TARGET_LINK_LIBRARIES(${EXECUTABLE} openblas) +TARGET_LINK_LIBRARIES(${EXECUTABLE} rlenvscpplib) +target_link_libraries(${EXECUTABLE} tbb) +target_link_libraries(${EXECUTABLE} boost_log) diff --git a/examples/filtering/filtering_example_2/filtering_example_2.cpp b/examples/filtering/filtering_example_2/filtering_example_2.cpp new file mode 100755 index 0000000..eee917d --- /dev/null +++ b/examples/filtering/filtering_example_2/filtering_example_2.cpp @@ -0,0 +1,222 @@ +#include "cubeai/base/cubeai_types.h" +#include "cubeai/estimation/extended_kalman_filter.h" +#include "cubeai/utils/iteration_counter.h" +#include "cubeai/io/csv_file_writer.h" +#include "rlenvs/dynamics/diff_drive_dynamics.h" +#include "rlenvs/dynamics/system_state.h" +#include "rlenvs/utils/unit_converter.h" + +#include +#include +#include +#include +#include +#include + +namespace example_2 +{ + +using cubeai::real_t; +using cubeai::uint_t; +using cubeai::DynMat; +using cubeai::DynVec; +using cubeai::estimation::ExtendedKalmanFilter; +using cubeai::utils::IterationCounter; +using cubeai::io::CSVWriter; +using rlenvscpp::dynamics::DiffDriveDynamics; +using rlenvscpp::dynamics::SysState; + + +class ObservationModel +{ + +public: + + // the ExtendedKalmanFilter expects an exposed + // input_type + typedef DynVec input_type; + + ObservationModel(); + + // simply return the state + const DynVec evaluate(const DynVec& input)const; + + // get the H or M matrix + const DynMat& get_matrix(const std::string& name)const; + +private: + + DynMat H; + DynMat M; +}; + +ObservationModel::ObservationModel() + : + H(2, 3), + M(2, 2) +{ + H = DynMat::Zero(2,3); + M = DynMat::Zero(2,2); + H(0, 0) = 1.0; + H(1,1) = 1.0; + M(0,0) = 1.0; + M(1, 1) = 1.0; + +} + +const DynVec +ObservationModel::evaluate(const DynVec& input)const{ + return input; +} + +const DynMat& +ObservationModel::get_matrix(const std::string& name)const{ + if(name == "H"){ + return H; + } + else if(name == "M"){ + return M; + } + + throw std::logic_error("Invalid matrix name. Name "+name+ " not found"); +} + +const DynVec get_measurement(const SysState<3>& state){ + + DynVec result(2); + result[0] = state.get("X"); + result[1] = state.get("Y"); + return result; +} + + +} + +int main() { + + using namespace example_2; + + BOOST_LOG_TRIVIAL(info)<<"Starting example..."; + + uint_t n_steps = 300; + auto time = 0.0; + auto dt = 0.5; + + /// angular velocity + auto w = 0.0; + + /// linear velocity + auto vt = 1.0; + + std::array motion_control_error; + motion_control_error[0] = 0.0; + motion_control_error[1] = 0.0; + + DiffDriveDynamics exact_motion_model; + exact_motion_model.set_matrix_update_flag(false); + exact_motion_model.set_time_step(dt); + + DiffDriveDynamics motion_model; + motion_model.set_time_step(dt); + + std::map input; + input["v"] = 1.0; + input["w"] = 0.0; + input["errors"] = motion_control_error; + motion_model.initialize_matrices(input); + + ObservationModel observation; + + ExtendedKalmanFilter ekf(motion_model, observation); + + DynMat R = DynMat::Zero(2, 2); + R(0,0) = 1.0; + R(1, 1) = 1.0; + + DynMat Q = DynMat::Zero(2, 2); + Q(0,0) = 0.001; + Q(1, 1) = 0.001; + + DynMat P = DynMat::Zero(3, 3); + P(0, 0) = 1.0; + P(1, 1) = 1.0; + P(2, 2) = 1.0; + + ekf.with_matrix("P", P) + .with_matrix("R", R) + .with_matrix("Q", Q); + + CSVWriter writer("state"); + writer.open(); + std::vector names{"Time", "X_true", "Y_true", "Theta_true", "X", "Y", "Theta"}; + writer.write_column_names(names); + + try{ + + + BOOST_LOG_TRIVIAL(info)<<"Starting simulation... "< motion_input; + motion_input["v"] = vt; // we keep the velocity constant + motion_input["errors"] = motion_control_error; + + for(uint_t step=0; step < n_steps; ++step){ + + BOOST_LOG_TRIVIAL(info)<<"Simulation time: "< row(7, 0.0); + row[0] = time; + row[1] = exact_state.get("X"); + row[2] = exact_state.get("Y"); + row[3] = exact_state.get("Theta"); + row[4] = state.get("X"); + row[5] = state.get("Y"); + row[6] = state.get("Theta"); + writer.write_row(row); + + time += dt; + counter++; + } + + BOOST_LOG_TRIVIAL(info)<<"Finished example..."; + } + catch(std::runtime_error& e){ + std::cerr< +#include +#include +#include +#include +#include + +namespace example_2 +{ + +using cubeai::real_t; +using cubeai::uint_t; +using cubeai::DynMat; +using cubeai::DynVec; +using cubeai::estimation::ExtendedKalmanFilter; +using cubeai::utils::IterationCounter; +using cubeai::io::CSVWriter; +using rlenvscpp::dynamics::DiffDriveDynamics; +using rlenvscpp::dynamics::SysState; + + +class ObservationModel +{ + +public: + + // the ExtendedKalmanFilter expects an exposed + // input_type + typedef DynVec input_type; + + ObservationModel(); + + // simply return the state + const DynVec evaluate(const DynVec& input)const; + + // get the H or M matrix + const DynMat& get_matrix(const std::string& name)const; + +private: + + DynMat H; + DynMat M; +}; + +ObservationModel::ObservationModel() + : + H(2, 3), + M(2, 2) +{ + H = DynMat::Zero(2,3); + M = DynMat::Zero(2,2); + H(0, 0) = 1.0; + H(1,1) = 1.0; + M(0,0) = 1.0; + M(1, 1) = 1.0; + +} + +const DynVec +ObservationModel::evaluate(const DynVec& input)const{ + return input; +} + +const DynMat& +ObservationModel::get_matrix(const std::string& name)const{ + if(name == "H"){ + return H; + } + else if(name == "M"){ + return M; + } + + throw std::logic_error("Invalid matrix name. Name "+name+ " not found"); +} + +const DynVec get_measurement(const SysState<3>& state){ + + DynVec result(2); + result[0] = state.get("X"); + result[1] = state.get("Y"); + return result; +} + + +} + +int main() { + + using namespace example_2; + + BOOST_LOG_TRIVIAL(info)<<"Starting example..."; + + uint_t n_steps = 300; + auto time = 0.0; + auto dt = 0.5; + + /// angular velocity + auto w = 0.0; + + /// linear velocity + auto vt = 1.0; + + std::array motion_control_error; + motion_control_error[0] = 0.0; + motion_control_error[1] = 0.0; + + DiffDriveDynamics exact_motion_model; + exact_motion_model.set_matrix_update_flag(false); + exact_motion_model.set_time_step(dt); + + DiffDriveDynamics motion_model; + motion_model.set_time_step(dt); + + std::map input; + input["v"] = 1.0; + input["w"] = 0.0; + input["errors"] = motion_control_error; + motion_model.initialize_matrices(input); + + ObservationModel observation; + + ExtendedKalmanFilter ekf(motion_model, observation); + + DynMat R = DynMat::Zero(2, 2); + R(0,0) = 1.0; + R(1, 1) = 1.0; + + DynMat Q = DynMat::Zero(2, 2); + Q(0,0) = 0.001; + Q(1, 1) = 0.001; + + DynMat P = DynMat::Zero(3, 3); + P(0, 0) = 1.0; + P(1, 1) = 1.0; + P(2, 2) = 1.0; + + ekf.with_matrix("P", P) + .with_matrix("R", R) + .with_matrix("Q", Q); + + CSVWriter writer("state"); + writer.open(); + std::vector names{"Time", "X_true", "Y_true", "Theta_true", "X", "Y", "Theta"}; + writer.write_column_names(names); + + try{ + + + BOOST_LOG_TRIVIAL(info)<<"Starting simulation... "< motion_input; + motion_input["v"] = vt; // we keep the velocity constant + motion_input["errors"] = motion_control_error; + + for(uint_t step=0; step < n_steps; ++step){ + + BOOST_LOG_TRIVIAL(info)<<"Simulation time: "< row(7, 0.0); + row[0] = time; + row[1] = exact_state.get("X"); + row[2] = exact_state.get("Y"); + row[3] = exact_state.get("Theta"); + row[4] = state.get("X"); + row[5] = state.get("Y"); + row[6] = state.get("Theta"); + writer.write_row(row); + + time += dt; + counter++; + } + + BOOST_LOG_TRIVIAL(info)<<"Finished example..."; + } + catch(std::runtime_error& e){ + std::cerr< diff --git a/examples/rl/rl_example_10/rl_example_10.cpp b/examples/rl/rl_example_10/rl_example_10.cpp index de91511..a19f5ba 100755 --- a/examples/rl/rl_example_10/rl_example_10.cpp +++ b/examples/rl/rl_example_10/rl_example_10.cpp @@ -25,8 +25,8 @@ using cubeai::rl::algos::td::QLearningConfig; using cubeai::rl::policies::EpsilonDecayOption; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; -using rlenvs_cpp::envs::gymnasium::CliffWorldActionsEnum; -typedef rlenvs_cpp::envs::gymnasium::CliffWorld env_type; + +typedef rlenvscpp::envs::gymnasium::CliffWorld env_type; } diff --git a/examples/rl/rl_example_11/rl_example_11.cpp b/examples/rl/rl_example_11/rl_example_11.cpp index 329f86a..37d39d6 100755 --- a/examples/rl/rl_example_11/rl_example_11.cpp +++ b/examples/rl/rl_example_11/rl_example_11.cpp @@ -28,7 +28,6 @@ using cubeai::rl::algos::ac::A2CConfig; using cubeai::rl::algos::ac::A2CSolver; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; -using rlenvs_cpp::envs::gymnasium::CartPoleActionsEnum; // create the Action and the Critic networks @@ -132,7 +131,7 @@ CriticNetImpl::forward(torch_tensor_t state){ TORCH_MODULE(ActorNet); TORCH_MODULE(CriticNet); -typedef rlenvs_cpp::envs::gymnasium::CartPole env_type; +typedef rlenvscpp::envs::gymnasium::CartPole env_type; } diff --git a/examples/rl/rl_example_12/rl_example_12.cpp b/examples/rl/rl_example_12/rl_example_12.cpp index 63e1986..ad2e530 100755 --- a/examples/rl/rl_example_12/rl_example_12.cpp +++ b/examples/rl/rl_example_12/rl_example_12.cpp @@ -41,7 +41,7 @@ using cubeai::torch_tensor_t; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; using cubeai::rl::policies::EpsilonGreedyPolicy; -using rlenvs_cpp::envs::grid_world::Gridworld; +using rlenvscpp::envs::grid_world::Gridworld; const uint_t l1 = 64; const uint_t l2 = 150; diff --git a/examples/rl/rl_example_13/rl_example_13.cpp b/examples/rl/rl_example_13/rl_example_13.cpp index 76f4ca7..887dd59 100755 --- a/examples/rl/rl_example_13/rl_example_13.cpp +++ b/examples/rl/rl_example_13/rl_example_13.cpp @@ -43,7 +43,7 @@ using cubeai::rl::algos::pg::ReinforceConfig; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; using cubeai::maths::stats::TorchCategorical; -using rlenvs_cpp::envs::gymnasium::CartPole; +using rlenvscpp::envs::gymnasium::CartPole; const uint_t L1 = 4; diff --git a/examples/rl/rl_example_15/rl_example_15.cpp b/examples/rl/rl_example_15/rl_example_15.cpp index ea7360e..2940880 100755 --- a/examples/rl/rl_example_15/rl_example_15.cpp +++ b/examples/rl/rl_example_15/rl_example_15.cpp @@ -42,7 +42,7 @@ using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; using cubeai::rl::policies::EpsilonGreedyPolicy; using cubeai::containers::ExperienceBuffer; -using rlenvs_cpp::envs::grid_world::Gridworld; +using rlenvscpp::envs::grid_world::Gridworld; const std::string EXPERIMENT_ID = "1"; @@ -176,7 +176,7 @@ int main(){ // initialize the environment using random mode std::unordered_map options; - options["mode"] = std::any(rlenvs_cpp::envs::grid_world::to_string(rlenvs_cpp::envs::grid_world::GridWorldInitType::RANDOM)); + options["mode"] = std::any(rlenvscpp::envs::grid_world::to_string(rlenvscpp::envs::grid_world::GridWorldInitType::RANDOM)); env.make("v0", options); diff --git a/examples/rl/rl_example_16/CMakeLists.txt b/examples/rl/rl_example_16/CMakeLists.txt new file mode 100644 index 0000000..ee2e4ca --- /dev/null +++ b/examples/rl/rl_example_16/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.6 FATAL_ERROR) + +SET(EXECUTABLE rl_example_16) +SET(SOURCE ${EXECUTABLE}.cpp) + +ADD_EXECUTABLE(${EXECUTABLE} ${SOURCE}) + +TARGET_LINK_LIBRARIES(${EXECUTABLE} cubeailib) + +IF( USE_PYTORCH ) + TARGET_LINK_LIBRARIES(${EXECUTABLE} ${TORCH_LIBRARIES}) +ENDIF() + +TARGET_LINK_LIBRARIES(${EXECUTABLE} pthread) +TARGET_LINK_LIBRARIES(${EXECUTABLE} openblas) +TARGET_LINK_LIBRARIES(${EXECUTABLE} rlenvscpplib) +target_link_libraries(${EXECUTABLE} tbb) +target_link_libraries(${EXECUTABLE} boost_log) + diff --git a/examples/rl/example_16/example_16.cpp b/examples/rl/rl_example_16/rl_example_16.cpp similarity index 86% rename from examples/rl/example_16/example_16.cpp rename to examples/rl/rl_example_16/rl_example_16.cpp index e1abc35..95ad42d 100755 --- a/examples/rl/example_16/example_16.cpp +++ b/examples/rl/rl_example_16/rl_example_16.cpp @@ -8,15 +8,9 @@ #include "cubeai/base/cubeai_types.h" -#include "cubeai/rl/algorithms/mc/mc_tree_search_base.h" -#include "cubeai/utils/array_utils.h" +//#include "cubeai/rl/algorithms/mc/mc_tree_search_solver.h" -#include "gymfcpp/gymfcpp_types.h" -#include "gymfcpp/taxi_env.h" -#include "gymfcpp/time_step.h" -#include "gymfcpp/gymfcpp_consts.h" - -#include +#include "rlenvs/envs/connect2/connect2_env.h" #include #include @@ -28,12 +22,13 @@ namespace example{ + /* using cubeai::real_t; using cubeai::uint_t; using cubeai::rl::algos::MCTreeSearchBase; using cubeai::rl::algos::MCTreeSearchConfig; -using gymfcpp::Taxi; +using rlenvscpp::envs::connect2::Connect2; typedef Taxi::time_step_type time_step_type; @@ -43,10 +38,7 @@ const uint_t N_EPISODES = 20000; const uint_t N_ITRS_PER_EPISODE = 2000; const real_t TOL = 1.0e-8; -/** - * Implementation of Monte Carlo tree serach for the OpenAI-Gym - * environment. - */ + template class TaxiMCTreeSearch: public MCTreeSearchBase { @@ -115,7 +107,8 @@ TaxiMCTreeSearch::simulate_node(std::shared_ptr node, env_type& return time_step; } - +*/ +/* template void TaxiMCTreeSearch::on_episode(){ @@ -188,7 +181,9 @@ TaxiMCTreeSearch::on_episode(){ best_rewards_.push_back(sum_reward_); } +*/ +/* template void TaxiMCTreeSearch::expand_node(std::shared_ptr node, env_type& env){ @@ -214,6 +209,7 @@ TaxiMCTreeSearch::backprop(std::shared_ptr node){ } + */ } @@ -223,23 +219,8 @@ int main(){ try{ - Py_Initialize(); - auto main_module = boost::python::import("__main__"); - auto main_namespace = main_module.attr("__dict__"); - - Taxi env("v0", main_namespace, false); - env.make(); + - MCTreeSearchConfig config; - config.max_tree_depth = 512; - config.temperature = 1.0; - TaxiMCTreeSearch agent(config, env); - agent.train(); - - } - catch(const boost::python::error_already_set&) - { - PyErr_Print(); } catch(std::exception& e){ std::cout<Monte Carlo tree search algorithm +or MCTS for short. + +---- +**Remark** + +A concise presentation of the algorithm can be found in the following video: https://www.youtube.com/watch?v=UXW2yZndl7U + +The following videos from Udacity's Reinforcement Learning also explain +the MCTS algorithm + +1. Monte Carlo Tree Search p1 +2. Monte Carlo Tree Search p2 +3. Monte Carlo Tree Search p3 +4. Monte Carlo Tree Search p4 + + +In this tutorial we will be applying MCTS in a toy environment namely Connect2. The following video is a nice discussion +on the application of MCTS on Connect2: Alpha Zero and Monte Carlo Tree Search. + +---- + +Overall, the vanilla MCTS algorithm has four steps + +1. Tree tranversal +2. Node expansion +3. Rollout or random simulation +4. Backward propagation + +Let's go over each one of these steps. + +### Tree traversal + +At this step the agent select the root node from which it will recursively select the best paths. +This is done until a leaf node is reached. The selction of a node is typically done using the +UCB (or upper confidence bound) criterion +but you can use any criterion that is suitable for your application but make sure that the criterion you are using +balances exploration and exploitation. The UCB criterion is as follows + + + +Where $N$ is the number of node visits +The UCB criterion limits the extent that each each branch will be explored. +This is done because as $N$ increases beacuse we visit the node more and more, +the overall factor continuously decreases thus the expected reward from the current branch will also decrease. + +A new node is created when we execute the rollout step. + + + +### Rollout step + +The MCTS starts with an empty tree data structure and iteratively will build a portion of the tree by running a number of simulations +Each simulation will add a single node to the tree. Since a simulation is the exploration step, the more simualtions we run, the beter +we expect the model to perform. However, simulating indefinitetly is not possible for obvious reasons. + +The MCTS algorithm implies that somehow we have a way of doing the simulation step. One way is if we know the transition dynamics of the +environment or being able to sample from the environment. + +### Backward propagation + +During the backward step we update the statistics for every node in the search path. These statistics affect the UCB score and will +guide the next MCTS simulation. + +Now that we have gone over the detaisl of the MCTS algorithm let's try to answer some practical questions. Fo example when does the algorithm stop? +There are various ways to address this question and the most common one is to simulate up until a specific number of iterations. + +All in all, the MCTS algorithm is summarised below. + +In this + +## Driver code + + +## References + + + + + diff --git a/examples/rl/rl_example_19/rl_example_19.cpp b/examples/rl/rl_example_19/rl_example_19.cpp index 08ffdcd..7b2724d 100755 --- a/examples/rl/rl_example_19/rl_example_19.cpp +++ b/examples/rl/rl_example_19/rl_example_19.cpp @@ -29,8 +29,8 @@ using cubeai::rl::algos::mc::FirstVisitMCSolver; using cubeai::rl::algos::mc::FirstVisitMCSolverConfig; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; -using rlenvs_cpp::envs::gymnasium::FrozenLake; -using rlenvs_cpp::envs::gymnasium::FrozenLakeActionsEnum; +using rlenvscpp::envs::gymnasium::FrozenLake; +using rlenvscpp::envs::gymnasium::FrozenLakeActionsEnum; const std::string SERVER_URL = "http://0.0.0.0:8001/api"; const uint_t SEED = 42; @@ -78,7 +78,7 @@ std::vector::time_step_type> TrajectoryGenerator::operator()(FrozenLake<4>& env, uint_t max_steps){ RandomActionSelector action_selector; - return rlenvs_cpp::envs::create_trajectory(env, action_selector, max_steps ); + return rlenvscpp::envs::create_trajectory(env, action_selector, max_steps ); } diff --git a/examples/rl/rl_example_6/rl_example_6.cpp b/examples/rl/rl_example_6/rl_example_6.cpp index a8a0263..8ba542e 100755 --- a/examples/rl/rl_example_6/rl_example_6.cpp +++ b/examples/rl/rl_example_6/rl_example_6.cpp @@ -25,7 +25,7 @@ using cubeai::rl::algos::dp::IterativePolicyEvalutationSolver; using cubeai::rl::algos::dp::IterativePolicyEvalConfig; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; -using rlenvs_cpp::envs::gymnasium::FrozenLake; +using rlenvscpp::envs::gymnasium::FrozenLake; typedef FrozenLake<4> env_type; } diff --git a/examples/rl/rl_example_7/rl_example_7.cpp b/examples/rl/rl_example_7/rl_example_7.cpp index a084c01..1c45b77 100755 --- a/examples/rl/rl_example_7/rl_example_7.cpp +++ b/examples/rl/rl_example_7/rl_example_7.cpp @@ -30,7 +30,7 @@ using cubeai::rl::algos::dp::PolicyIterationSolver; using cubeai::rl::algos::dp::PolicyIterationConfig; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; -using rlenvs_cpp::envs::gymnasium::FrozenLake; +using rlenvscpp::envs::gymnasium::FrozenLake; typedef FrozenLake<4> env_type; } diff --git a/examples/rl/rl_example_8/rl_example_8.cpp b/examples/rl/rl_example_8/rl_example_8.cpp index d58b983..3657825 100755 --- a/examples/rl/rl_example_8/rl_example_8.cpp +++ b/examples/rl/rl_example_8/rl_example_8.cpp @@ -23,7 +23,7 @@ using cubeai::rl::algos::dp::ValueIterationConfig; using cubeai::rl::RLSerialAgentTrainer; using cubeai::rl::RLSerialTrainerConfig; -using rlenvs_cpp::envs::gymnasium::FrozenLake; +using rlenvscpp::envs::gymnasium::FrozenLake; typedef FrozenLake<4> env_type; typedef ValueIteration; /// + /// Float type vector /// - /// - using FloatVec = DynVec; + using FloatVec = DynVec; + + /// + /// Real type vector + /// + using RealVec = DynVec; /// /// \brief Null type. Simple placeholder diff --git a/include/cubeai/estimation/extended_kalman_filter.h b/include/cubeai/estimation/extended_kalman_filter.h new file mode 100644 index 0000000..032eb85 --- /dev/null +++ b/include/cubeai/estimation/extended_kalman_filter.h @@ -0,0 +1,324 @@ +#ifndef EXTENDED_KALMAN_FILTER_H +#define EXTENDED_KALMAN_FILTER_H + +#include "cubeai/base/cubeai_types.h" + +#include +#include +#include +#include + +namespace cubeai{ +namespace estimation{ + +/// +/// Implements the Extended Kalman filter algorithm. +/// The class expects a number of inputs in order to be useful. Concretely +/// the application must specity +/// +/// MotionModelTp a motion model +/// ObservationModelTp observation model +/// +/// The MotionModelTp should expose the following functions +/// +/// evaluate(MotionModelTp input)-->State& +/// get_matrix(const std::string)--->DynMat +/// +/// In particular, the class expects the L, F matrices to be supplied via the +/// get_matix function of the motion model. +/// +/// Similarly, the ObservationModelTp should expose the following functions +/// +/// evaluate(ObservationModelTp& input)--->DynVec +/// get_matrix(const std::string)--->DynMat +/// +/// In particular, the class expects the M, H matrices to be supplied via the +/// get_matix function of the observation model. +/// +/// Finally, the application should supply the P, Q, R matrices associated +/// with the process +/// + +template +class ExtendedKalmanFilter: private boost::noncopyable +{ +public: + + typedef MotionModelTp motion_model_type; + typedef ObservationModelTp observation_model_type; + typedef typename motion_model_type::input_type motion_model_input_type; + typedef typename motion_model_type::matrix_type matrix_type; + typedef typename motion_model_type::state_type state_type; + typedef typename observation_model_type::input_type observation_model_input_type; + + /// \brief Constructor + ExtendedKalmanFilter(); + + /// + /// \brief Constructor. Initialize with a writable reference + /// of the motion model and a read only reference of the observation model + /// + ExtendedKalmanFilter(motion_model_type& motion_model, + const observation_model_type& observation_model); + + /// + /// \brief Destructor + /// + ~ExtendedKalmanFilter(); + + /// \brief Estimate the state. This function simply + /// wraps the predict and update steps described by the + /// functions below + void estimate(const std::tuple& input ); + + /// \brief Predicts the state vector x and the process covariance matrix P using + /// the given input control u accroding to the following equations + /// + /// \hat{x}_{k = f(x_{k-1}, u_{k}, w_k) + /// \hat{P}_{k} = F_{k-1} * P_{k-1} * F_{k-1}^T + L_{k-1} * Q_{k-1} * L_{k-1}^T + /// + /// where x_{k-1} is the state at the previous step, u_{k} + /// is the control signal and w_k is the error associated with the + /// control signal. In input argument passed to the function is meant + /// to model in a tuple all the arguments needed. F, L are Jacobian matrices + /// and Q is the covariance matrix associate with the control signal + void predict(const motion_model_input_type& input); + + /// \brief Updates the gain matrix K, the state vector x and covariance matrix P + /// using the given measurement z_k according to the following equations + /// + /// K_k = \hat{P}_{k} * H_{k}^T * (H_k * \hat{P}_{k} * H_{k}^T + M_k * R_k * M_k^T)^{-1} + /// x_k = \hat{x}_{k} + K_k * (z_k - h( \hat{x}_{k}, 0)) + /// P_k = (I - K_k * H_k) * \hat{P}_{k} + void update(const observation_model_input_type& z); + + /// \brief Set the motion model + void set_motion_model(motion_model_type& motion_model) + {motion_model_ptr_ = &motion_model;} + + /// \brief Set the observation model + void set_observation_model(const observation_model_type& observation_model) + {observation_model_ptr_ = &observation_model;} + + /// \brief Set the matrix used by the filter + void set_matrix(const std::string& name, const matrix_type& mat); + + /// \brief Returns true if the matrix with the given name exists + bool has_matrix(const std::string& name)const; + + /// \brief Returns the state + const state_type& get_state()const{return motion_model_ptr_->get_state();} + + /// \brief Returns the state + state_type& get_state(){return motion_model_ptr_->get_state();} + + /// \brief Returns the state property with the given name + real_t get(const std::string& name)const{return motion_model_ptr_->get_state_property(name);} + + /// \brief Returns the name-th matrix + const DynMat& operator[](const std::string& name)const; + + /// \brief Returns the name-th matrix + DynMat& operator[](const std::string& name); + + /// + /// \brief Set the matrix and return *this + /// + ExtendedKalmanFilter& with_matrix(const std::string& name, const matrix_type& mat); + +protected: + + /// \brief pointer to the function that computes f + motion_model_type* motion_model_ptr_; + + /// \brief pointer to the function that computes h + const observation_model_type* observation_model_ptr_; + + /// \brief Matrices used by the filter internally + std::map matrices_; + +}; + +template +ExtendedKalmanFilter::ExtendedKalmanFilter() + : + motion_model_ptr_(nullptr), + observation_model_ptr_(nullptr) +{} + +template +ExtendedKalmanFilter::ExtendedKalmanFilter(motion_model_type& motion_model, + const observation_model_type& observation_model) + : + motion_model_ptr_(&motion_model), + observation_model_ptr_(&observation_model) +{} + +template +ExtendedKalmanFilter::~ExtendedKalmanFilter() +{} + +template +void +ExtendedKalmanFilter::set_matrix(const std::string& name, + const matrix_type& mat){ + + if(name != "Q" && name != "K" && name != "R" && name != "P"){ + throw std::logic_error("Invalid matrix name. Name: "+ + name+ + " not in [Q, K, R, P]"); + } + + matrices_.insert_or_assign(name, mat); +} + +template +ExtendedKalmanFilter& +ExtendedKalmanFilter::with_matrix(const std::string& name, + const matrix_type& mat){ + set_matrix(name, mat); + return *this; +} + +template +bool +ExtendedKalmanFilter::has_matrix(const std::string& name)const{ + + auto itr = matrices_.find(name); + return itr != matrices_.end(); +} + +template +const DynMat& +ExtendedKalmanFilter::operator[](const std::string& name)const{ + + auto itr = matrices_.find(name); + + if(itr == matrices_.end()){ + throw std::invalid_argument("Matrix: "+name+" does not exist"); + } + + return itr->second; +} + +template +DynMat& +ExtendedKalmanFilter::operator[](const std::string& name){ + + auto itr = matrices_.find(name); + + if(itr == matrices_.end()){ + throw std::invalid_argument("Matrix: "+name+" does not exist"); + } + + return itr->second; +} + + +template +void +ExtendedKalmanFilter::estimate(const std::tuple& input ){ + + predict(input.template get<0>()); + update(input.template get<1>()); +} + +template +void +ExtendedKalmanFilter::predict(const motion_model_input_type& u){ + + /// make a state prediction using the + /// motion model + motion_model_ptr_->evaluate(u); + + auto& P = (*this)["P"]; + auto& Q = (*this)["Q"]; + + auto& L = motion_model_ptr_->get_matrix("L"); + auto L_T = L.transpose(); //trans(L); + + auto& F = motion_model_ptr_->get_matrix("F"); + auto F_T = F.transpose(); //trans(F); + + P = F * P * F_T + L*Q*L_T; +} + +template +void +ExtendedKalmanFilter::update(const observation_model_input_type& z){ + + auto& state = motion_model_ptr_->get_state(); + auto& P = (*this)["P"]; + auto& R = (*this)["R"]; + + auto zpred = observation_model_ptr_->evaluate(z); + + auto& H = observation_model_ptr_->get_matrix("H"); + auto H_T = H.transpose(); //trans(H); + + // compute \partial{h}/\partial{v} the jacobian of the observation model + // w.r.t the error vector + auto& M = observation_model_ptr_->get_matrix("M"); + auto M_T = M.transpose(); //trans(M); + + try{ + + // S = H*P*H^T + M*R*M^T + auto S = H*P*H_T + M*R*M_T; + + auto S_inv = S.inverse(); //inv(S); + + if(has_matrix("K")){ + auto& K = (*this)["K"]; + K = P*H_T*S_inv; + } + else{ + auto K = P*H_T*S_inv; + set_matrix("K", K); + } + + auto& K = (*this)["K"]; + + auto innovation = z - zpred; + + // we need to take the transpose + auto innovation_t = innovation.transpose(); + + if(K.cols() != innovation_t.rows()){ + throw std::runtime_error("Matrix columns: "+ + std::to_string(K.cols())+ + " not equal to vector size: "+ + std::to_string(innovation_t.rows())); + } + + //auto vec = K * innovation_t; + state += K * innovation_t; //.add(K*innovation); + + //IdentityMatrix I(state.size()); + auto I = matrix_type::Identity(state.size(), state.size()); + /// update the covariance matrix + P = (I - K*H)*P; + } + catch(...){ + + // this is a singular matrix what + // should we do? Simply use the predicted + // values and log the fact that there was a singular matrix + + throw; + } +} + + +} +} + + +#endif \ No newline at end of file diff --git a/include/cubeai/io/csv_file_writer.h b/include/cubeai/io/csv_file_writer.h index 7ba8de4..6d08af5 100755 --- a/include/cubeai/io/csv_file_writer.h +++ b/include/cubeai/io/csv_file_writer.h @@ -60,10 +60,9 @@ class CSVWriter: public FileWriterBase template void write_row(const DynVec& vals); - /** - * - * @brief Write the given vector as a column - */ + /// + /// \brief Write the given vector as a column + /// template void write_column_vector(const std::vector& vals); diff --git a/include/cubeai/rl/algorithms/mc/mc_tree_search_base.h b/include/cubeai/rl/algorithms/mc/mc_tree_search_base.h deleted file mode 100644 index 7a3b80f..0000000 --- a/include/cubeai/rl/algorithms/mc/mc_tree_search_base.h +++ /dev/null @@ -1,398 +0,0 @@ -#ifndef MC_TREE_SEARCH_BASE_H -#define MC_TREE_SEARCH_BASE_H - -#include "cubeai/base/cubeai_types.h" -#include "cubeai/rl/algorithms/algorithm_base.h" -#include "cubeai/rl/algorithms/rl_algo_config.h" -#include "cubeai/utils/iteration_mixin.h" - -#include "cubeai/base/cubeai_config.h" - -#ifdef CUBEAI_DEBUG -#include -#endif - -#include -#include -#include -#include - -namespace cubeai{ -namespace rl{ -namespace algos{ - - -/// -/// \brief The MCTreeSearchConfig struct -/// -struct MCTreeSearchConfig: public RLAlgoConfig -{ - uint_t max_tree_depth = 1000; - real_t temperature = 1.0; - -}; - -template -class MCTreeNodeBase -{ -public: - - typedef ActionTp action_type; - typedef StateTp state_type; - - /// - /// \brief MCTreeNodeBase - /// \param parent - /// \param action - /// - MCTreeNodeBase(std::shared_ptr parent, action_type action); - - /// - /// - /// - virtual ~MCTreeNodeBase()=default; - - /// - /// \brief add_child - /// \param child - /// - void add_child(std::shared_ptr> child); - - /// - /// \brief add_child - /// \param parent - /// \param action - /// - void add_child(std::shared_ptr> parent, action_type action); - - /// - /// \brief has_children - /// \return - /// - bool has_children()const noexcept{return children_.empty() != true;} - - /// - /// \brief get_child - /// \param cidx - /// \return - /// - std::shared_ptr get_child(uint_t cidx){return children_[cidx];} - - /// - /// \brief n_children - /// \return - /// - uint_t n_children()const noexcept{return children_.size();} - - /// - /// \brief shuffle_children - /// - void shuffle_children()noexcept; - - /// - /// \brief explored_children - /// \return - /// - uint_t n_explored_children()const noexcept{return explored_children_;} - - /// - /// \brief update_visits - /// - void update_visits()noexcept{total_visits_ += 1;} - - /// - /// - /// - void update_explored_children()noexcept{explored_children_ += 1;} - - /// - /// \brief update_total_score - /// \param score - /// - void update_total_score(real_t score)noexcept {total_score_ += score;} - - /// - /// \brief ucb - /// \param temperature - /// \return - /// - real_t ucb(real_t temperature )const; - - /// - /// \brief max_ucb_child - /// \return - /// - std::shared_ptr max_ucb_child(real_t temperature)const; - - /// - /// \brief win_pct - /// \return - /// - real_t win_pct()const{return total_score_ / total_visits_ ;} - - /// - /// \brief total_visits - /// \return - /// - uint_t total_visits()const noexcept{return total_visits_;} - - /// - /// \brief get_action - /// \return - /// - uint_t get_action()const noexcept{return action_;} - - /// - /// \brief parent - /// \return - /// - std::shared_ptr parent(){return parent_;} - -protected: - - - - /// - /// \brief total_score_ - /// - real_t total_score_; - - /// - /// \brief total_visits_ - /// - uint_t total_visits_; - - /// - /// \brief explored_children_ - /// - uint_t explored_children_; - - /// - /// \brief action_ - /// - action_type action_; - - /// - /// \brief parent_ - /// - std::shared_ptr parent_; - - /// - /// \brief children_ - /// - std::vector> children_; - -}; - -template -MCTreeNodeBase::MCTreeNodeBase(std::shared_ptr parent, action_type action) - : - total_score_(0.0), - total_visits_(0), - explored_children_(0), - action_(action), - parent_(parent), - children_() -{} - - -template -void -MCTreeNodeBase::add_child(std::shared_ptr> child){ - -#ifdef CUBEAI_DEBUG - assert(child != nullptr && "Cannot add null children"); -#endif - - children_.push_back(child); - -} - -template -void -MCTreeNodeBase::shuffle_children()noexcept{ - - if(children_.empty()){ - return; - } - - std::random_shuffle(children_.begin(), children_.end()); -} - -template -void -MCTreeNodeBase::add_child(std::shared_ptr> parent, action_type action){ - - add_child(std::make_shared>(parent, action)); -} - -template -std::shared_ptr> -MCTreeNodeBase::max_ucb_child(real_t temperature)const{ - - if(children_.empty()){ - return std::shared_ptr(); - } - - auto current_ucb = children_[0]->ucb(temperature); - auto current_idx = 0; - auto dummy_idx = 0; - for(auto node : children_){ - - auto node_ucb = node->ucb(temperature); - - if(node_ucb > current_ucb){ - - current_idx = dummy_idx; - current_ucb = node_ucb; - } - - dummy_idx += 1; - } - - return children_[current_idx]; -} - -template -real_t -MCTreeNodeBase::ucb(real_t temperature )const{ - - return win_pct() + temperature * std::sqrt(std::log(parent_ -> total_visits()) / total_visits_); -} - -/// -/// \brief MCTreeSearchBase. -/// -template> -class MCTreeSearchBase: public AlgorithmBase -{ -public: - - static_assert (std::is_default_constructible::value, "Action type is not default constructible"); - static_assert (std::is_default_constructible::value, "State type is not default constructible"); - - /// - /// \brief env_type - /// - typedef EnvTp env_type; - - /// - /// \brief node_type - /// - typedef NodeTp node_type; - - /// - /// \brief actions_before_training_episodes. Execute any actions the - /// algorithm needs before starting the iterations - /// - virtual void actions_before_training_episodes() override; - - /// - /// \brief actions_after_training_episodes. Actions to execute after - /// the training iterations have finisehd - /// - virtual void actions_after_training_episodes() override; - - /// - /// \brief step Do one step of the algorithm - /// - virtual void on_episode() = 0; - - /// - /// \brief simulate_node - /// \param node - /// - virtual typename env_type::time_step_type simulate_node(std::shared_ptr node, env_type& env)=0; - - /// - /// \brief expand_node - /// \param node - /// - virtual void expand_node(std::shared_ptr node, env_type& env)=0; - - /// - /// \brief backprop - /// - virtual void backprop(std::shared_ptr node)=0; - - /// - /// \brief max_depth_tree - /// \return - /// - uint_t max_depth_tree()const noexcept{return max_depth_tree_;} - -protected: - - /// - /// \brief MCTreeSearchBase - /// \param config - /// - MCTreeSearchBase(MCTreeSearchConfig config, env_type& env); - - /// - /// \brief itr_mix_ - /// - IterationMixin itr_mix_; - - /// - /// \brief env_ - /// - env_type& env_; - - /// - /// \brief root_ - /// - std::shared_ptr root_; - - /// - /// \brief max_depth_tree_ - /// - uint_t max_depth_tree_; - - /// - /// \brief temperature_ - /// - real_t temperature_; - -}; - -template -MCTreeSearchBase::MCTreeSearchBase(MCTreeSearchConfig config, env_type& env) - : - AlgorithmBase(config.n_episodes, config.tolerance), - itr_mix_(config.n_itrs_per_episode, config.render_episode), - env_(env), - root_(nullptr), - max_depth_tree_(config.max_tree_depth), - temperature_(config.temperature) -{} - -template -void -MCTreeSearchBase::actions_after_training_episodes(){ - - //this->AlgorithmBase::actions_after_training_episodes(); -} - -template -void -MCTreeSearchBase::actions_before_training_episodes(){ - //this->AlgorithmBase::actions_before_training_episodes(); -} - -template -void -MCTreeSearchBase::on_episode(){ - - while(itr_mix_.continue_iterations()){ - - - } -} - - -} -} -} - -#endif // MC_TREE_SEARCH_BASE_H