Skip to content

Commit

Permalink
Units compatibility for optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
NAThompson committed Feb 11, 2024
1 parent 3bf7d97 commit 0ae806f
Show file tree
Hide file tree
Showing 10 changed files with 187 additions and 78 deletions.
105 changes: 56 additions & 49 deletions include/boost/math/optimization/cma_es.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ namespace boost::math::optimization {

template <typename ArgumentContainer> struct cma_es_parameters {
using Real = typename ArgumentContainer::value_type;
using DimensionlessReal = decltype(Real()/Real());
ArgumentContainer lower_bounds;
ArgumentContainer upper_bounds;
size_t max_generations = 1000;
Expand All @@ -51,12 +52,13 @@ template <typename ArgumentContainer> struct cma_es_parameters {
// and rounded up to the nearest multiple of threads:
size_t population_size = 0;
// In the reference, learning_rate = c_m:
Real learning_rate = 1;
DimensionlessReal learning_rate = 1;
};

template <typename ArgumentContainer>
void validate_cma_es_parameters(cma_es_parameters<ArgumentContainer> &params) {
using Real = typename ArgumentContainer::value_type;
using DimensionlessReal = decltype(Real()/Real());
using std::isfinite;
using std::isnan;
using std::log;
Expand All @@ -77,7 +79,7 @@ void validate_cma_es_parameters(cma_es_parameters<ArgumentContainer> &params) {
//params.population_size = k*params.threads;
params.population_size = static_cast<size_t>(4 + floor(3*log(n)));
}
if (params.learning_rate <= Real(0) || !std::isfinite(params.learning_rate)) {
if (params.learning_rate <= DimensionlessReal(0) || !isfinite(params.learning_rate)) {
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": The learning rate must be > 0, but got " << params.learning_rate << ".";
throw std::invalid_argument(oss.str());
Expand All @@ -94,8 +96,8 @@ ArgumentContainer cma_es(
std::atomic<std::invoke_result_t<Func, ArgumentContainer>> *current_minimum_cost = nullptr,
std::vector<std::pair<ArgumentContainer, std::invoke_result_t<Func, ArgumentContainer>>> *queries = nullptr)
{

using Real = typename ArgumentContainer::value_type;
using DimensionlessReal = decltype(Real()/Real());
using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
using std::abs;
using std::log;
Expand All @@ -115,81 +117,81 @@ ArgumentContainer cma_es(
std::atomic<ResultType> lowest_cost = std::numeric_limits<ResultType>::infinity();
ArgumentContainer best_vector;
// p_{c} := evolution path, equation (24) of the arxiv review:
Eigen::Vector<Real, Eigen::Dynamic> p_c(n);
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_c(n);
// p_{\sigma} := conjugate evolution path, equation (31) of the arxiv review:
Eigen::Vector<Real, Eigen::Dynamic> p_sigma(n);
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_sigma(n);
if constexpr (detail::has_resize_v<ArgumentContainer>) {
best_vector.resize(n, std::numeric_limits<Real>::quiet_NaN());
}
for (size_t i = 0; i < n; ++i) {
p_c[i] = Real(0);
p_sigma[i] = Real(0);
p_c[i] = DimensionlessReal(0);
p_sigma[i] = DimensionlessReal(0);
}
// Table 1, \mu = floor(\lambda/2):
size_t mu = params.population_size/2;
std::vector<Real> w_prime(params.population_size, std::numeric_limits<Real>::quiet_NaN());
std::vector<DimensionlessReal> w_prime(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
for (size_t i = 0; i < params.population_size; ++i) {
// Equation (49), but 0-indexed:
w_prime[i] = log(static_cast<Real>(params.population_size + 1)/(2*(i+1)));
w_prime[i] = log(static_cast<DimensionlessReal>(params.population_size + 1)/(2*(i+1)));
}
// Table 1, notes at top:
Real positive_weight_sum = 0;
Real sq_weight_sum = 0;
DimensionlessReal positive_weight_sum = 0;
DimensionlessReal sq_weight_sum = 0;
for (size_t i = 0; i < mu; ++i) {
BOOST_MATH_ASSERT(w_prime[i] > 0);
positive_weight_sum += w_prime[i];
sq_weight_sum += w_prime[i]*w_prime[i];
}
Real mu_eff = positive_weight_sum*positive_weight_sum/sq_weight_sum;
DimensionlessReal mu_eff = positive_weight_sum*positive_weight_sum/sq_weight_sum;
BOOST_MATH_ASSERT(1 <= mu_eff);
BOOST_MATH_ASSERT(mu_eff <= mu);
Real negative_weight_sum = 0;
DimensionlessReal negative_weight_sum = 0;
sq_weight_sum = 0;
for (size_t i = mu; i < params.population_size; ++i) {
BOOST_MATH_ASSERT(w_prime[i] <= 0);
negative_weight_sum += w_prime[i];
sq_weight_sum += w_prime[i]*w_prime[i];
}
Real mu_eff_m = negative_weight_sum*negative_weight_sum/sq_weight_sum;
DimensionlessReal mu_eff_m = negative_weight_sum*negative_weight_sum/sq_weight_sum;
// Equation (54):
Real c_m = params.learning_rate;
DimensionlessReal c_m = params.learning_rate;
// Equation (55):
Real c_sigma = (mu_eff + 2)/(n + mu_eff + 5);
DimensionlessReal c_sigma = (mu_eff + 2)/(n + mu_eff + 5);
BOOST_MATH_ASSERT(c_sigma < 1);
Real d_sigma = 1 + 2*(max)(Real(0), sqrt((mu_eff - 1)/(n + 1)) - 1) + c_sigma;
DimensionlessReal d_sigma = 1 + 2*(max)(DimensionlessReal(0), sqrt(DimensionlessReal((mu_eff - 1)/(n + 1))) - DimensionlessReal(1)) + c_sigma;
// Equation (56):
Real c_c = (4 + mu_eff/n)/(n + 4 + 2*mu_eff/n);
DimensionlessReal c_c = (4 + mu_eff/n)/(n + 4 + 2*mu_eff/n);
BOOST_MATH_ASSERT(c_c <= 1);
// Equation (57):
Real c_1 = Real(2)/(pow(n + 1.3, 2) + mu_eff);
DimensionlessReal c_1 = DimensionlessReal(2)/(pow(n + 1.3, 2) + mu_eff);
// Equation (58)
Real c_mu = (min)(1 - c_1, 2*(Real(0.25) + mu_eff + 1/mu_eff - 2)/((n+2)*(n+2) + mu_eff));
BOOST_MATH_ASSERT(c_1 + c_mu <= Real(1));
DimensionlessReal c_mu = (min)(1 - c_1, 2*(DimensionlessReal(0.25) + mu_eff + 1/mu_eff - 2)/((n+2)*(n+2) + mu_eff));
BOOST_MATH_ASSERT(c_1 + c_mu <= DimensionlessReal(1));
// Equation (50):
Real alpha_mu_m = 1 + c_1/c_mu;
DimensionlessReal alpha_mu_m = 1 + c_1/c_mu;
// Equation (51):
Real alpha_mu_eff_m = 1 + 2*mu_eff_m/(mu_eff + 2);
DimensionlessReal alpha_mu_eff_m = 1 + 2*mu_eff_m/(mu_eff + 2);
// Equation (52):
Real alpha_m_pos_def = (1- c_1 - c_mu)/(n*c_mu);
DimensionlessReal alpha_m_pos_def = (1- c_1 - c_mu)/(n*c_mu);
// Equation (53):
std::vector<Real> weights(params.population_size, std::numeric_limits<Real>::quiet_NaN());
std::vector<DimensionlessReal> weights(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
for (size_t i = 0; i < mu; ++i) {
weights[i] = w_prime[i]/positive_weight_sum;
}
Real min_alpha = (min)(alpha_mu_m, (min)(alpha_mu_eff_m, alpha_m_pos_def));
DimensionlessReal min_alpha = (min)(alpha_mu_m, (min)(alpha_mu_eff_m, alpha_m_pos_def));
for (size_t i = mu; i < params.population_size; ++i) {
weights[i] = min_alpha*w_prime[i]/abs(negative_weight_sum);
}
// mu:= number of parents, lambda := number of offspring.
Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic> C = Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>::Identity(n, n);
Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> C = Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>::Identity(n, n);
ArgumentContainer mean_vector;
Real sigma = 1;
// See the footnote in Figure 6 of the arxiv review:
// We should consider the more robust initialization described there. . .
Real sigma = DimensionlessReal(0.3)*(params.upper_bounds[0] - params.lower_bounds[0]);;
if (params.initial_guess) {
mean_vector = *params.initial_guess;
}
else {
// See the footnote in Table 1 of the arxiv review:
sigma = Real(0.3)*(params.upper_bounds[0] - params.lower_bounds[0]);
mean_vector = detail::random_initial_population(params.lower_bounds, params.upper_bounds, 1, gen)[0];
}
auto initial_cost = cost_function(mean_vector);
Expand Down Expand Up @@ -221,11 +223,11 @@ ArgumentContainer cma_es(
#endif
size_t generation = 0;

std::vector<Eigen::Vector<Real, Eigen::Dynamic>> ys(params.population_size);
std::vector<Eigen::Vector<DimensionlessReal, Eigen::Dynamic>> ys(params.population_size);
std::vector<ArgumentContainer> xs(params.population_size);
std::vector<ResultType> costs(params.population_size, std::numeric_limits<ResultType>::quiet_NaN());
Eigen::Vector<Real, Eigen::Dynamic> weighted_avg_y(n);
Eigen::Vector<Real, Eigen::Dynamic> z(n);
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> weighted_avg_y(n);
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> z(n);
if constexpr (detail::has_resize_v<ArgumentContainer>) {
for (auto & x : xs) {
x.resize(n, std::numeric_limits<Real>::quiet_NaN());
Expand All @@ -234,7 +236,7 @@ ArgumentContainer cma_es(
for (auto & y : ys) {
y.resize(n);
}
normal_distribution<Real> dis(Real(0), Real(1));
normal_distribution<DimensionlessReal> dis(DimensionlessReal(0), DimensionlessReal(1));
do {
if (cancellation && *cancellation) {
break;
Expand All @@ -243,14 +245,16 @@ ArgumentContainer cma_es(
// Section B.2 "Strategy internal numerical effort":
// "In practice, the re-calculation of B and D needs to be done not until about
// max(1, floor(1/(10n(c_1+c_mu)))) generations."
Eigen::SelfAdjointEigenSolver<Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic>> eigensolver(C);
// Note that sigma can be dimensionless, in which case C carries the units.
// This is a weird decision-we're not gonna do that!
Eigen::SelfAdjointEigenSolver<Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>> eigensolver(C);
if (eigensolver.info() != Eigen::Success) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": Could not decompose the covariance matrix as BDB^{T}.";
throw std::logic_error(oss.str());
}
Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic> B = eigensolver.eigenvectors();
Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> B = eigensolver.eigenvectors();
// Eigen returns D^2, in the notation of the survey:
auto D = eigensolver.eigenvalues();
// So make it better:
Expand Down Expand Up @@ -278,7 +282,10 @@ ArgumentContainer cma_es(
for (size_t i = 0; i < n; ++i) {
z[i] = dis(gen);
}
Eigen::Vector<Real, Eigen::Dynamic> Dz = D.array()*z.array();
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> Dz(n);
for (size_t i = 0; i < n; ++i) {
Dz[i] = D[i]*z[i];
}
y = B*Dz;
for (size_t i = 0; i < n; ++i) {
BOOST_MATH_ASSERT(!isnan(mean_vector[i]));
Expand Down Expand Up @@ -331,37 +338,37 @@ ArgumentContainer cma_es(
mean_vector[j] = mean_vector[j] + c_m*sigma*weighted_avg_y[j];
}
// Equation (43), Figure 6: Start with C^{-1/2}<y>_{w}
Eigen::Vector<Real, Eigen::Dynamic> inv_D_B_transpose_y = B.transpose()*weighted_avg_y;
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> inv_D_B_transpose_y = B.transpose()*weighted_avg_y;
for (size_t j = 0; j < inv_D_B_transpose_y.size(); ++j) {
inv_D_B_transpose_y[j] /= D[j];
}
Eigen::Vector<Real, Eigen::Dynamic> C_inv_sqrt_y_avg = B*inv_D_B_transpose_y;
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> C_inv_sqrt_y_avg = B*inv_D_B_transpose_y;
// Equation (43), Figure 6:
Real p_sigma_norm = 0;
DimensionlessReal p_sigma_norm = 0;
for (size_t j = 0; j < n; ++j) {
p_sigma[j] = (1-c_sigma)*p_sigma[j] + sqrt(c_sigma*(2-c_sigma)*mu_eff)*C_inv_sqrt_y_avg[j];
p_sigma_norm += p_sigma[j]*p_sigma[j];
}
p_sigma_norm = sqrt(p_sigma_norm);
// A: Algorithm Summary: E[||N(0,1)||]:
const Real expectation_norm_0I = sqrt(static_cast<Real>(n))*(Real(1) - Real(1)/(4*n) + Real(1)/(21*n*n));
const DimensionlessReal expectation_norm_0I = sqrt(static_cast<DimensionlessReal>(n))*(DimensionlessReal(1) - DimensionlessReal(1)/(4*n) + DimensionlessReal(1)/(21*n*n));
// Equation (44), Figure 6:
sigma = sigma*exp(c_sigma*(p_sigma_norm/expectation_norm_0I -1)/d_sigma);
// A: Algorithm Summary:
Real h_sigma = 0;
Real rhs = (Real(1.4) + Real(2)/(n+1))*expectation_norm_0I*sqrt(1 - pow(1-c_sigma, 2*(generation+1)));
DimensionlessReal h_sigma = 0;
DimensionlessReal rhs = (DimensionlessReal(1.4) + DimensionlessReal(2)/(n+1))*expectation_norm_0I*sqrt(1 - pow(1-c_sigma, 2*(generation+1)));
if (p_sigma_norm < rhs) {
h_sigma = 1;
}
// Equation (45), Figure 6:
p_c = (1-c_c)*p_c + h_sigma*sqrt(c_c*(2-c_c)*mu_eff)*weighted_avg_y;
Real delta_h_sigma = (1-h_sigma)*c_c*(2-c_c);
Real weight_sum = 0;
DimensionlessReal delta_h_sigma = (1-h_sigma)*c_c*(2-c_c);
DimensionlessReal weight_sum = 0;
for (auto & w : weights) {
weight_sum += w;
}
// Equation (47), Figure 6:
Real K = (1 + c_1*delta_h_sigma - c_1 - c_mu*weight_sum);
DimensionlessReal K = (1 + c_1*delta_h_sigma - c_1 - c_mu*weight_sum);
// Can these operations be sped up using `.selfadjointView<Eigen::Upper>`?
// Maybe: A.selfadjointView<Eigen::Lower>().rankUpdate(p_c, c_1);?
C = K*C + c_1*p_c*p_c.transpose();
Expand All @@ -370,12 +377,12 @@ ArgumentContainer cma_es(
C += c_mu*weights[i]*ys[indices[i]]*ys[indices[i]].transpose();
}
for (size_t i = params.population_size/2; i < params.population_size; ++i) {
Eigen::Vector<Real, Eigen::Dynamic> D_inv_BTy = B.transpose()*ys[indices[i]];
Eigen::Vector<DimensionlessReal, Eigen::Dynamic> D_inv_BTy = B.transpose()*ys[indices[i]];
for (size_t j = 0; j < n; ++j) {
D_inv_BTy[j] /= D[j];
}
Real squared_norm = D_inv_BTy.squaredNorm();
Real K2 = c_mu*weights[i]/squared_norm;
DimensionlessReal squared_norm = D_inv_BTy.squaredNorm();
DimensionlessReal K2 = c_mu*weights[i]/squared_norm;
C += K2*ys[indices[i]]*ys[indices[i]].transpose();
}
} while (generation++ < params.max_generations);
Expand Down
6 changes: 4 additions & 2 deletions include/boost/math/optimization/detail/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ std::vector<ArgumentContainer> random_initial_population(ArgumentContainer const
ArgumentContainer const &upper_bounds,
size_t initial_population_size, URBG &&gen) {
using Real = typename ArgumentContainer::value_type;
using DimensionlessReal = decltype(Real()/Real());
constexpr bool has_resize = detail::has_resize_v<ArgumentContainer>;
std::vector<ArgumentContainer> population(initial_population_size);
auto const dimension = lower_bounds.size();
Expand Down Expand Up @@ -98,7 +99,7 @@ std::vector<ArgumentContainer> random_initial_population(ArgumentContainer const
// That said, scipy uses Latin Hypercube sampling and says self-avoiding sequences are preferable.
// So this is something that could be investigated and potentially improved.
using std::uniform_real_distribution;
uniform_real_distribution<Real> dis(Real(0), Real(1));
uniform_real_distribution<DimensionlessReal> dis(DimensionlessReal(0), DimensionlessReal(1));
for (size_t i = 0; i < population.size(); ++i) {
for (size_t j = 0; j < dimension; ++j) {
auto const &lb = lower_bounds[j];
Expand Down Expand Up @@ -163,6 +164,7 @@ template <typename Real> std::vector<size_t> best_indices(std::vector<Real> cons

template<typename RandomAccessContainer>
auto weighted_lehmer_mean(RandomAccessContainer const & values, RandomAccessContainer const & weights) {
using std::isfinite;
if (values.size() != weights.size()) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
Expand All @@ -179,7 +181,7 @@ auto weighted_lehmer_mean(RandomAccessContainer const & values, RandomAccessCont
Real numerator = 0;
Real denominator = 0;
for (size_t i = 0; i < values.size(); ++i) {
if (weights[i] < 0 || !std::isfinite(weights[i])) {
if (weights[i] < 0 || !isfinite(weights[i])) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": All weights must be positive and finite, but got received weight " << weights[i] << " at index " << i << " of " << weights.size() << ".";
Expand Down
8 changes: 5 additions & 3 deletions include/boost/math/optimization/differential_evolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ namespace boost::math::optimization {
// We provide the parameters in a struct-there are too many of them and they are too unwieldy to pass individually:
template <typename ArgumentContainer> struct differential_evolution_parameters {
using Real = typename ArgumentContainer::value_type;
using DimensionlessReal = decltype(Real()/Real());
ArgumentContainer lower_bounds;
ArgumentContainer upper_bounds;
// mutation factor is also called scale factor or just F in the literature:
Real mutation_factor = static_cast<Real>(0.65);
double crossover_probability = 0.5;
DimensionlessReal mutation_factor = static_cast<DimensionlessReal>(0.65);
DimensionlessReal crossover_probability = static_cast<DimensionlessReal>(0.5);
// Population in each generation:
size_t NP = 500;
size_t max_generations = 1000;
Expand Down Expand Up @@ -94,6 +95,7 @@ ArgumentContainer differential_evolution(
std::vector<std::pair<ArgumentContainer, std::invoke_result_t<Func, ArgumentContainer>>> *queries = nullptr,
std::atomic<std::invoke_result_t<Func, ArgumentContainer>> *current_minimum_cost = nullptr) {
using Real = typename ArgumentContainer::value_type;
using DimensionlessReal = decltype(Real()/Real());
using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
using std::clamp;
using std::isnan;
Expand Down Expand Up @@ -159,7 +161,7 @@ ArgumentContainer differential_evolution(
// I first tried seeding thread-local random number generators with the global generator,
// but ThreadSanitizer didn't like it. I *suspect* there's a way around this, but
// even if it's formally threadsafe, there's still a lot of effort to get it computationally reproducible.
uniform_real_distribution<Real> unif01(Real(0), Real(1));
uniform_real_distribution<DimensionlessReal> unif01(DimensionlessReal(0), DimensionlessReal(1));
for (size_t i = 0; i < NP; ++i) {
size_t r1, r2, r3;
do {
Expand Down
Loading

0 comments on commit 0ae806f

Please sign in to comment.