diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f5a8c66a3..aacd207c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,7 +68,7 @@ repos: - --blank exclude: src/optimagic/optimization/algo_options.py - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.7.2 hooks: # Run the linter. - id: ruff @@ -108,7 +108,7 @@ repos: - '88' files: (docs/.) - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.0 hooks: - id: nbstripout exclude: | diff --git a/.tools/envs/testenv-linux.yml b/.tools/envs/testenv-linux.yml index 827c56d40..1e1c0846f 100644 --- a/.tools/envs/testenv-linux.yml +++ b/.tools/envs/testenv-linux.yml @@ -8,7 +8,7 @@ dependencies: - jax - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -37,4 +37,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-numpy.yml b/.tools/envs/testenv-numpy.yml index ef75ece28..34681b9ba 100644 --- a/.tools/envs/testenv-numpy.yml +++ b/.tools/envs/testenv-numpy.yml @@ -8,7 +8,7 @@ dependencies: - numpy<2 - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-others.yml b/.tools/envs/testenv-others.yml index 9b81be122..444205593 100644 --- a/.tools/envs/testenv-others.yml +++ b/.tools/envs/testenv-others.yml @@ -6,7 +6,7 @@ channels: dependencies: - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -35,4 +35,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-pandas.yml b/.tools/envs/testenv-pandas.yml index d7e99f2be..ff4996dc5 100644 --- a/.tools/envs/testenv-pandas.yml +++ b/.tools/envs/testenv-pandas.yml @@ -8,7 +8,7 @@ dependencies: - numpy<2 - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 8407ede25..1929ec914 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -27,6 +27,7 @@ dependencies: - patsy - joblib - plotly + - nlopt - annotated-types - pip: - ../ @@ -38,3 +39,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs + - fides==0.7.4 # dev, tests diff --git a/docs/source/conf.py b/docs/source/conf.py index 38a4c6bb4..bf50593c3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -48,6 +48,7 @@ "sphinxcontrib.bibtex", "sphinx_panels", "sphinx_design", + "sphinxcontrib.mermaid", ] myst_enable_extensions = [ @@ -55,6 +56,9 @@ "dollarmath", "html_image", ] +myst_fence_as_directive = ["mermaid"] + + copybutton_prompt_text = ">>> " copybutton_only_copy_prompt_lines = False diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 1620e7225..fceb44038 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -5,26 +5,86 @@ "metadata": {}, "source": [ "(how-to-select-algorithms)=\n", + "# How to select a local optimizer\n", "\n", - "# Which optimizer to use\n", + "This guide explains how to choose a local optimizer that works well for your problem. \n", + "Depending on your [strategy for global optimization](how_to_globalization.ipynb) it \n", + "is also relevant for global optimization problems. \n", "\n", - "This is a very very very short and oversimplifying guide on selecting an optimization algorithm based on a minimum of information. \n", + "## Important facts \n", "\n", + "- There is no optimizer that works well for all problems \n", + "- Making the right choice can lead to enormous speedups\n", + "- Making the wrong choice can mean that you [don't solve your problem at all](algo-selection-how-important). Sometimes,\n", + "optimizers fail silently!\n", "\n", - "To select an optimizer, you need to answer two questions:\n", "\n", - "1. Is your criterion function differentiable?\n", + "## The three steps for selecting algorithms\n", "\n", - "2. Do you have a nonlinear least squares structure (i.e. do you sum some kind of squared residuals at the end of your criterion function)?" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define some inputs\n", + "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", + "steps:\n", + "\n", + "1. **Theory**: Based on the properties of your problem, start with 3 to 5 candidate algorithms. \n", + "You may use the decision tree below.\n", + "2. **Experiments**: Run the candidate algorithms for a small number of function \n", + "evaluations and compare the results in a *criterion plot*. As a rule of thumb, use \n", + "between `n_params` and `10 * n_params` evaluations. \n", + "3. **Optimization**: Re-run the algorithm with the best results until \n", + "convergence. Use the best parameter vector from the experiments as start parameters.\n", + "\n", + "We will walk you through the steps in an [example](algo-selection-example-problem)\n", + "below. These steps work well for most problems but sometimes you need \n", + "[variations](algo-selection-steps-variations).\n", + "\n", + "\n", + "## A decision tree \n", + "\n", + "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", + "\n", + "```{mermaid}\n", + "graph LR\n", + " classDef highlight fill:#FF4500;\n", + " A[\"Do you have
nonlinear
constraints?\"] -- yes --> B[\"differentiable?\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt
nlopt_slsqp
scipy_trust_constr\"]\n", + " B[\"differentiable?\"] -- no --> D[\"scipy_cobyla
nlopt_cobyla\"]\n", + "\n", + " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", + "\n", + " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm
scipy_ls_trf
scipy_ls_dogleg\"]\n", + " F[\"differentiable?\"] -- no --> I[\"nag_dflos
pounders
tao_pounders\"]\n", + "\n", + " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb
nlopt_lbfgsb
fides\"]\n", + " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa
nlopt_neldermead
neldermead_parallel\"]\n", "\n", - "Again, we use versions of the sphere function to illustrate how to select these algorithms in practice" + "```\n", + "\n", + "Going through the different questions will give you a list of candidate algorithms. \n", + "All algorithms in that list are designed for the same problem class but use different \n", + "approaches to solve the problem. Which of them works best for your problem can only be \n", + "found out through experimentation.\n", + "\n", + "```{note}\n", + "Many books on numerical optimization focus strongly on the inner workings of algorithms.\n", + "They will, for example, describe the difference between a trust-region algorithm and a \n", + "line-search algorithm in a lot of detail. We have an [intuitive explanation](../explanation/explanation_of_numerical_optimizers.md) of this too. Understanding these details is important for configuring and\n", + "troubleshooting optimizations, but not for algorithm selection. For example, If you have\n", + "a scalar, differentiable problem without nonlinear constraints, the decision tree \n", + "suggests `fides` and two variants of `lbfgsb`. `fides` is a trust-region algorithm, \n", + "`lbfgsb` is a line-search algorithm. Both are designed to solve the same kinds of \n", + "problems and which one works best needs to be found out through experimentation.\n", + "```\n", + "\n", + "(algo-selection-example-problem)=\n", + "\n", + "## An example problem\n", + "\n", + "As an example we use the [Trid function](https://www.sfu.ca/~ssurjano/trid.html). The Trid function has no local minimum except \n", + "the global one. It is defined for any number of dimensions, we will pick 20. As starting \n", + "values we will pick the vector [0, 1, ..., 19]. \n", + "\n", + "A Python implementation of the function and its gradient looks like this:" ] }, { @@ -33,9 +93,9 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", + "import warnings\n", "\n", - "import optimagic as om" + "warnings.filterwarnings(\"ignore\")" ] }, { @@ -44,25 +104,57 @@ "metadata": {}, "outputs": [], "source": [ - "@om.mark.least_squares\n", - "def sphere(params):\n", - " return params\n", + "import numpy as np\n", + "\n", + "import optimagic as om\n", "\n", "\n", - "def sphere_gradient(params):\n", - " return params * 2\n", + "def trid_scalar(x):\n", + " \"\"\"Implement Trid function: https://www.sfu.ca/~ssurjano/trid.html.\"\"\"\n", + " return ((x - 1) ** 2).sum() - (x[1:] * x[:-1]).sum()\n", "\n", "\n", - "start_params = np.arange(5)" + "def trid_gradient(x):\n", + " \"\"\"Calculate gradient of trid function.\"\"\"\n", + " l1 = np.insert(x, 0, 0)\n", + " l1 = np.delete(l1, [-1])\n", + " l2 = np.append(x, 0)\n", + " l2 = np.delete(l2, [0])\n", + " return 2 * (x - 1) - l1 - l2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Differentiable criterion function\n", + "### Step 1: Theory\n", + "\n", + "\n", + "\n", + "Let's go through the decision tree for the Trid function:\n", + "\n", + "1. **No** nonlinear constraints our solution needs to satisfy\n", + "2. **No** least-squares structure we can exploit \n", + "3. **Yes**, the function is differentiable. We even have a closed form gradient that \n", + "we would like to use. \n", + "\n", + "We therefore end up with the candidate algorithms `scipy_lbfgsb`, `nlopt_lbfgsb`, and \n", + "`fides`.\n", "\n", - "Use `scipy_lbfsgsb` as optimizer and provide the closed form derivative if you can. If you do not provide a derivative, optimagic will calculate it numerically. However, this is less precise and slower. " + "```{note}\n", + "If your function is differentiable but you do not have a closed form gradient (yet), \n", + "we suggest to use at least one gradient based optimizer and one gradient free optimizer.\n", + "in your experiments. Optimagic will use numerical gradients in that case. For details, \n", + "see [here](how_to_derivatives.ipynb).\n", + "```\n", + "\n", + "\n", + "### Step 2: Experiments\n", + "\n", + "To find out which algorithms work well for our problem, we simply run optimizations with\n", + "all candidate algorithms in a loop and store the result in a dictionary. We limit the \n", + "number of function evaluations to 8. Since some algorithms only support a maximum number\n", + "of iterations as stopping criterion we also limit the number of iterations to 8.\n" ] }, { @@ -71,40 +163,57 @@ "metadata": {}, "outputs": [], "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"scipy_lbfgsb\",\n", - " jac=sphere_gradient,\n", - ")\n", - "res.n_fun_evals" + "results = {}\n", + "for algo in [\"scipy_lbfgsb\", \"nlopt_lbfgsb\", \"fides\"]:\n", + " results[algo] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=np.arange(20),\n", + " algorithm=algo,\n", + " algo_options={\"stopping_maxfun\": 8, \"stopping_maxiter\": 8},\n", + " )\n", + "\n", + "fig = om.criterion_plot(results, max_evaluations=8)\n", + "fig.show(renderer=\"png\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note that this solves a 5 dimensional problem with just 3 criterion evaluations. For higher dimensions, you will need more, but it scales very well to dozens and hundreds of parameters. \n", + "All optimizers work pretty well here and since this is a very simple problem, any of them \n", + "would probably find the optimum in a reasonable time. However, `nlopt_lbfgsb` is a bit \n", + "better than the others, so we will select it for the next step. In more difficult\n", + "examples, the difference between optimizers can be much more pronounced.\n", "\n", - "If you are worried about being stuck in a local optimum, use multistart optimization." + "### Step 3: Optimization \n", + "\n", + "All that is left to do is to run the optimization until convergence with the best \n", + "optimizer. To avoid duplicated calculations, we can already start from the previously \n", + "best parameter vector:" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Not differentiable, only scalar output" + "best_x = results[\"nlopt_lbfgsb\"].params\n", + "results[\"nlopt_lbfgsb_complete\"] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=best_x,\n", + " algorithm=\"nlopt_lbfgsb\",\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `nag_pybobyqa`. Note that for this you need to install the `PyBOBYQA` package if you do not already have it:\n", - " \n", - "`pip install Py-BOBYQA`\n", - "\n", - "Then you select the algorithm as follows:" + "Looking at the result in a criterion plot we can see that the optimizer converges after \n", + "a bit more than 30 function evaluations. " ] }, { @@ -113,26 +222,75 @@ "metadata": {}, "outputs": [], "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"nag_pybobyqa\",\n", - ")\n", - "res.n_fun_evals" + "fig = om.criterion_plot(results)\n", + "fig.show(renderer=\"png\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Not differentiable, least squares structure\n", + "(algo-selection-steps-variations)=\n", + "\n", + "## Variations of the four steps\n", + "\n", + "The four steps described above work very well in most situations. However, sometimes \n", + "it makes sense to deviate: \n", "\n", - "Use `nag_dfols`. To use `nag_dfols`, you need to install it via:\n", + "- If you are unsure about some of the questions in step 1, select more algorithms for \n", + "the experimentation phase and run more than 1 algorithm until convergence. \n", + "- If it is very important to find a precise optimum, run more than 1 algorithm until \n", + "convergence. \n", + "- If you have a very fast objective function, simply run all candidate algorithms until \n", + "convergence. \n", + "- If you have a differentiable objective function but no closed form derivative, use \n", + "at least one gradient based optimizer and one gradient free optimizer in the \n", + "experiments. See [here](how_to_derivatives.ipynb) to learn more about derivatives.\n", "\n", - "`pip install DFO-LS`\n", "\n", + "(algo-selection-how-important)=\n", + "\n", + "## How important was it?\n", + "\n", + "The Trid function is differentiable and very well behaved in almost every aspect. \n", + "Moreover, it has a very short runtime. One would think that any optimizer can find its \n", + "optimum. So let's compare the selected optimizer with a few others:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = {}\n", + "for algo in [\"nlopt_lbfgsb\", \"scipy_neldermead\", \"scipy_cobyla\"]:\n", + " results[algo] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=np.arange(20),\n", + " algorithm=algo,\n", + " )\n", + "\n", + "fig = om.criterion_plot(results)\n", + "fig.show(renderer=\"png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that our chosen optimizer solves the problem with less than 35 function \n", + "evaluations. At this point, the two gradient-free optimizers have not yet made \n", + "significant progress. CoByLA gets reasonably close to an optimum after about 4k \n", + "evaluations. Nelder-Mead gets stuck after 8k evaluations and fails to solve the problem. \n", "\n", - "This optimizer will only work if your criterion function returns a dictionary that contains the entry `root_contributions`. This needs to be a numpy array or pytree that contains the residuals of the least squares problem. " + "This example shows not only that the choice of optimizer is important but that the commonly \n", + "held belief that gradient free optimizers are generally more robust than gradient based \n", + "ones is dangerous! The Nelder-Mead algorithm did \"converge\" and reports success, but\n", + "did not find the optimum. It did not even get stuck in a local optimum because we know \n", + "that the Trid function does not have local optima except the global one. It just got \n", + "stuck somewhere. " ] }, { @@ -141,12 +299,7 @@ "metadata": {}, "outputs": [], "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"nag_dfols\",\n", - ")\n", - "res.n_fun_evals" + "results[\"scipy_neldermead\"].success" ] } ], @@ -166,7 +319,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/docs/source/how_to/how_to_globalization.ipynb b/docs/source/how_to/how_to_globalization.ipynb new file mode 100644 index 000000000..1ddf45cbc --- /dev/null +++ b/docs/source/how_to/how_to_globalization.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to choose a strategy for global optimization\n", + "\n", + "(to be written)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/how_to/index.md b/docs/source/how_to/index.md index 8b7cdd37d..412b0f252 100644 --- a/docs/source/how_to/index.md +++ b/docs/source/how_to/index.md @@ -15,6 +15,7 @@ how_to_derivatives how_to_algorithm_selection how_to_bounds how_to_constraints +how_to_globalization how_to_multistart how_to_visualize_histories how_to_specify_algorithm_and_algo_options diff --git a/environment.yml b/environment.yml index a608d5540..cfd6bf6eb 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - jupyterlab # dev, docs - - nlopt # dev, tests + - nlopt # dev, tests, docs - pdbpp # dev - pip # dev, tests, docs - pytest # dev, tests @@ -50,3 +50,4 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs