Skip to content

Commit

Permalink
Move recursive multivariate OLS to a separate file.
Browse files Browse the repository at this point in the history
  • Loading branch information
romanwerpachowski committed Aug 26, 2020
1 parent 1b81fdf commit 9ec1a0a
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 129 deletions.
64 changes: 2 additions & 62 deletions ML/LinearRegression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,7 @@ namespace ml
return calc_univariate_linear_regression_result(sxx, sxy, syy, 0, 0, n, false);
}

/** @brief Type of matrix decomposition used for \f$X X^T\f$. */
typedef Eigen::LDLT<Eigen::MatrixXd> XXTMatrixDecomposition;

/** @brief Calculates X*X^T, inverts it, and calculates beta. */
static Eigen::VectorXd calculate_XXt_beta(const Eigen::Ref<const Eigen::MatrixXd> X, const Eigen::Ref<const Eigen::VectorXd> y, XXTMatrixDecomposition& xxt_decomp)
Eigen::VectorXd calculate_XXt_beta(const Eigen::Ref<const Eigen::MatrixXd> X, const Eigen::Ref<const Eigen::VectorXd> y, Eigen::LDLT<Eigen::MatrixXd>& xxt_decomp)
{
// X is an q x N matrix and y is a N-size vector.
const auto n = static_cast<unsigned int>(X.cols());
Expand Down Expand Up @@ -151,7 +147,7 @@ namespace ml
// X is an q x N matrix and y is a N-size vector.
const auto q = static_cast<unsigned int>(X.rows());
const auto n = static_cast<unsigned int>(X.cols());
XXTMatrixDecomposition xxt_decomp;
Eigen::LDLT<Eigen::MatrixXd> xxt_decomp;
MultivariateOLSResult result;
result.beta = calculate_XXt_beta(X, y, xxt_decomp);
result.n = n;
Expand Down Expand Up @@ -184,61 +180,5 @@ namespace ml
X_with_intercept.bottomRows(1) = Eigen::RowVectorXd::Ones(X.cols());
return X_with_intercept;
}

RecursiveMultivariateOLS::RecursiveMultivariateOLS()
: n_(0), d_(0)
{}

RecursiveMultivariateOLS::RecursiveMultivariateOLS(const Eigen::Ref<const Eigen::MatrixXd> X, const Eigen::Ref<const Eigen::VectorXd> y)
{
initialise(X, y);
}

void RecursiveMultivariateOLS::update(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y)
{
if (!n_) {
initialise(X, y);
}
else {
const unsigned int n_i = static_cast<unsigned int>(X.cols());
if (!n_i) {
throw std::invalid_argument("No new data points");
}
if (d_ != static_cast<unsigned int>(X.rows())) {
throw std::invalid_argument("Data dimension mismatch");
}
if (X.cols() != y.size()) {
throw std::invalid_argument("X matrix has different number of data points than Y has values");
}
// Update P.
K_.noalias() = P_ * X;
assert(K_.rows() == d_);
assert(K_.cols() == n_i);
W_.noalias() = X.transpose() * K_;
assert(W_.rows() == n_i);
assert(W_.cols() == n_i);
W_ += Eigen::MatrixXd::Identity(n_i, n_i);
helper_decomp_.compute(W_);
V_ = helper_decomp_.solve(K_.transpose());
assert(V_.rows() == n_i);
assert(V_.cols() == d_);
P_.noalias() -= K_ * V_;
// Update beta.
K_.noalias() = P_ * X;
assert(K_.rows() == d_);
assert(K_.cols() == n_i);
residuals_ = y - X.transpose() * beta_;
beta_ += K_ * residuals_;
n_ += n_i;
}
}

void RecursiveMultivariateOLS::initialise(const Eigen::Ref<const Eigen::MatrixXd> X, const Eigen::Ref<const Eigen::VectorXd> y)
{
beta_ = calculate_XXt_beta(X, y, helper_decomp_);
d_ = static_cast<unsigned int>(X.rows());
n_ = static_cast<unsigned int>(X.cols());
P_ = helper_decomp_.solve(Eigen::MatrixXd::Identity(d_, d_));
}
}
}
71 changes: 4 additions & 67 deletions ML/LinearRegression.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once
#include <string>
#include <Eigen/Cholesky>
#include <Eigen/Core>
#include "dll.hpp"

Expand Down Expand Up @@ -118,73 +117,11 @@ namespace ml
@return New matrix with a row filled with 1's added at the end.
@throw std::invalid_argument If `X.cols() == 0`.
*/
DLL_DECLSPEC Eigen::MatrixXd add_ones(Eigen::Ref<const Eigen::MatrixXd> X);
DLL_DECLSPEC Eigen::MatrixXd add_ones(Eigen::Ref<const Eigen::MatrixXd> X);

/** @brief Recursive multivariate Ordinary Least Squares.
Given a stream of pairs \f$(X_i, \vec{y}_i)\f$, updates the least-squares estimate for \f$\vec{\beta}\f$ using the model
\f$ \vec{y}_0 = X_0^T \vec{\beta} + \vec{e}_0 \f$
\f$ \vec{y}_1 = X_1^T \vec{\beta} + \vec{e}_1 \f$
...
where \f$\vec{e}_i\f$ are i.i.d. Gaussian.
Based on https://cpb-us-w2.wpmucdn.com/sites.gatech.edu/dist/2/436/files/2017/07/22-notes-6250-f16.pdf
/** @brief Calculates X*X^T, inverts it, and calculates beta.
@private Shared between multiple linear regression algorithms.
*/
class RecursiveMultivariateOLS
{
public:
/** @brief Initialises without data. */
DLL_DECLSPEC RecursiveMultivariateOLS();

/** @brief Initialises with the first sample and calculates the first beta estimate.
@param[in] X D x N matrix of X values, with data points in columns.
@param[in] y Y vector with length N.
@throw std::invalid_argument If `y.size() != X.cols()` or `X.cols() < X.rows()`.
*/
DLL_DECLSPEC RecursiveMultivariateOLS(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y);

/** @brief Updates the beta estimate with a new sample.
@param[in] X D x N matrix of X values, with data points in columns.
@param[in] y Y vector with length N.
@throw std::invalid_argument If `(X, y)` is the first sample (i.e., `n() == 0`) and `X.cols() < X.rows()`, or `y.size() != X.cols()`.
*/
DLL_DECLSPEC void update(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y);

/** @brief Returns the number of data points seen so far. */
unsigned int n() const
{
return n_;
}

/** @brief Returns the dimension of data points. If n() == 0, returs 0. */
unsigned int d() const
{
return d_;
}

/** @brief Returns the current estimate of beta. If n() == 0, returns an empty vector. */
const Eigen::VectorXd& beta() const
{
return beta_;
}
private:
Eigen::LDLT<Eigen::MatrixXd> helper_decomp_; /**< N_i x N_i decomposition. */
Eigen::MatrixXd P_; /**< D x D information matrix, equal to (X_1 * X_1^T + X_2 * X_2 + ...)^-1. */
Eigen::MatrixXd K_; /**< D x N_i helper matrix. */
Eigen::MatrixXd W_; /**< N_i x N_i helper matrix. */
Eigen::MatrixXd V_; /**< N_i x D helper matrix. */
Eigen::VectorXd beta_; /**< Current estimate of beta. */
Eigen::VectorXd residuals_; /**< Helper vector w/ size N_i. */
unsigned int n_; /**< Number of data points seen so far. */
unsigned int d_; /**< Dimension of each x data point. */

/// Initialise recursive OLS.
void initialise(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y);
};
Eigen::VectorXd calculate_XXt_beta(const Eigen::Ref<const Eigen::MatrixXd> X, const Eigen::Ref<const Eigen::VectorXd> y, Eigen::LDLT<Eigen::MatrixXd>& xxt_decomp);
}
}
2 changes: 2 additions & 0 deletions ML/ML.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
<ClInclude Include="dll.hpp" />
<ClInclude Include="EM.hpp" />
<ClInclude Include="LinearRegression.hpp" />
<ClInclude Include="RecursiveMultivariateOLS.hpp" />
<ClInclude Include="Statistics.hpp" />
<ClInclude Include="Version.hpp" />
</ItemGroup>
Expand All @@ -177,6 +178,7 @@
<ClCompile Include="DecisionTrees.cpp" />
<ClCompile Include="EM.cpp" />
<ClCompile Include="LinearRegression.cpp" />
<ClCompile Include="RecursiveMultivariateOLS.cpp" />
<ClCompile Include="Statistics.cpp" />
</ItemGroup>
<ItemGroup>
Expand Down
6 changes: 6 additions & 0 deletions ML/ML.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@
<ClInclude Include="Version.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="RecursiveMultivariateOLS.hpp">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="EM.cpp">
Expand All @@ -65,6 +68,9 @@
<ClCompile Include="Statistics.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="RecursiveMultivariateOLS.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="SConscript" />
Expand Down
1 change: 1 addition & 0 deletions Tests/test_LinearRegression.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <random>
#include <gtest/gtest.h>
#include "ML/LinearRegression.hpp"
#include "ML/RecursiveMultivariateOLS.hpp"
#include "ML/Statistics.hpp"

using namespace ml::LinearRegression;
Expand Down
1 change: 1 addition & 0 deletions cppyml/linear_regression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <pybind11/pybind11.h>
#include <pybind11/eigen.h>
#include "ML/LinearRegression.hpp"
#include "ML/RecursiveMultivariateOLS.hpp"
#include "types.hpp"


Expand Down

0 comments on commit 9ec1a0a

Please sign in to comment.