From 4dca0bc484d0f6a9ce6125e029f955ec57c1b361 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Mon, 28 Oct 2024 12:41:47 +0100 Subject: [PATCH 1/3] Allow for AlgorithmType in estimation functions (#535) --- src/estimagic/estimate_ml.py | 9 ++++-- src/estimagic/estimate_msm.py | 9 ++++-- src/optimagic/shared/check_option_dicts.py | 2 +- tests/estimagic/test_estimate_ml.py | 29 ++++++++++++++++++ tests/estimagic/test_estimate_msm.py | 35 ++++++++++++++++++++++ 5 files changed, 77 insertions(+), 7 deletions(-) diff --git a/src/estimagic/estimate_ml.py b/src/estimagic/estimate_ml.py index 39383c6a4..4044fee1d 100644 --- a/src/estimagic/estimate_ml.py +++ b/src/estimagic/estimate_ml.py @@ -95,8 +95,8 @@ def estimate_ml( optimize_options to False. Pytrees can be a numpy array, a pandas Series, a DataFrame with "value" column, a float and any kind of (nested) dictionary or list containing these elements. See :ref:`params` for examples. - optimize_options (dict, str or False): Keyword arguments that govern the - numerical optimization. Valid entries are all arguments of + optimize_options (dict, Algorithm, str or False): Keyword arguments that govern + the numerical optimization. Valid entries are all arguments of :func:`~estimagic.optimization.optimize.minimize` except for those that are passed explicilty to ``estimate_ml``. If you pass False as optimize_options you signal that ``params`` are already the optimal parameters and no @@ -199,7 +199,10 @@ def estimate_ml( is_optimized = optimize_options is False if not is_optimized: - if isinstance(optimize_options, str): + # If optimize_options is not a dictionary and not False, we assume it represents + # an algorithm. The actual testing of whether it is a valid algorithm is done + # when `maximize` is called. + if not isinstance(optimize_options, dict): optimize_options = {"algorithm": optimize_options} check_optimization_options( diff --git a/src/estimagic/estimate_msm.py b/src/estimagic/estimate_msm.py index 57430bb99..b207e9829 100644 --- a/src/estimagic/estimate_msm.py +++ b/src/estimagic/estimate_msm.py @@ -107,8 +107,8 @@ def estimate_msm( optimize_options to False. Pytrees can be a numpy array, a pandas Series, a DataFrame with "value" column, a float and any kind of (nested) dictionary or list containing these elements. See :ref:`params` for examples. - optimize_options (dict, str or False): Keyword arguments that govern the - numerical optimization. Valid entries are all arguments of + optimize_options (dict, Algorithm, str or False): Keyword arguments that govern + the numerical optimization. Valid entries are all arguments of :func:`~estimagic.optimization.optimize.minimize` except for those that can be passed explicitly to ``estimate_msm``. If you pass False as ``optimize_options`` you signal that ``params`` are already @@ -199,7 +199,10 @@ def estimate_msm( is_optimized = optimize_options is False if not is_optimized: - if isinstance(optimize_options, str): + # If optimize_options is not a dictionary and not False, we assume it represents + # an algorithm. The actual testing of whether it is a valid algorithm is done + # when `minimize` is called. + if not isinstance(optimize_options, dict): optimize_options = {"algorithm": optimize_options} check_optimization_options( diff --git a/src/optimagic/shared/check_option_dicts.py b/src/optimagic/shared/check_option_dicts.py index 82ace0201..c4c45fcc7 100644 --- a/src/optimagic/shared/check_option_dicts.py +++ b/src/optimagic/shared/check_option_dicts.py @@ -41,6 +41,6 @@ def check_optimization_options(options, usage, algorithm_mandatory=True): msg = ( "The following are not valid entries of optimize_options because they are " "not only relevant for minimization but also for inference: " - "{invalid_general}" + f"{invalid_general}" ) raise ValueError(msg) diff --git a/tests/estimagic/test_estimate_ml.py b/tests/estimagic/test_estimate_ml.py index 01714bee8..f3e806311 100644 --- a/tests/estimagic/test_estimate_ml.py +++ b/tests/estimagic/test_estimate_ml.py @@ -18,6 +18,7 @@ scalar_logit_fun_and_jac, ) from optimagic import mark +from optimagic.optimizers import scipy_optimizers from optimagic.parameters.bounds import Bounds @@ -349,6 +350,34 @@ def test_estimate_ml_optimize_options_false(fitted_logit_model, logit_np_inputs) aaae(got.cov(method="jacobian"), fitted_logit_model.covjac, decimal=4) +def test_estimate_ml_algorithm_type(logit_np_inputs): + """Test that estimate_ml computes correct covariances given correct params.""" + kwargs = {"y": logit_np_inputs["y"], "x": logit_np_inputs["x"]} + + params = pd.DataFrame({"value": logit_np_inputs["params"]}) + + estimate_ml( + loglike=logit_loglike, + params=params, + loglike_kwargs=kwargs, + optimize_options=scipy_optimizers.ScipyLBFGSB, + ) + + +def test_estimate_ml_algorithm(logit_np_inputs): + """Test that estimate_ml computes correct covariances given correct params.""" + kwargs = {"y": logit_np_inputs["y"], "x": logit_np_inputs["x"]} + + params = pd.DataFrame({"value": logit_np_inputs["params"]}) + + estimate_ml( + loglike=logit_loglike, + params=params, + loglike_kwargs=kwargs, + optimize_options=scipy_optimizers.ScipyLBFGSB(stopping_maxfun=10), + ) + + # ====================================================================================== # Univariate normal case using dict params # ====================================================================================== diff --git a/tests/estimagic/test_estimate_msm.py b/tests/estimagic/test_estimate_msm.py index 0c7a2d775..2684513d9 100644 --- a/tests/estimagic/test_estimate_msm.py +++ b/tests/estimagic/test_estimate_msm.py @@ -10,6 +10,7 @@ from estimagic.estimate_msm import estimate_msm from optimagic.optimization.optimize_result import OptimizeResult +from optimagic.optimizers import scipy_optimizers from optimagic.shared.check_option_dicts import ( check_optimization_options, ) @@ -161,6 +162,40 @@ def test_estimate_msm_with_jacobian(): aaae(calculated.cov(), cov_np) +def test_estimate_msm_with_algorithm_type(): + start_params = np.array([3, 2, 1]) + expected_params = np.zeros(3) + empirical_moments = _sim_np(expected_params) + if isinstance(empirical_moments, dict): + empirical_moments = empirical_moments["simulated_moments"] + + estimate_msm( + simulate_moments=_sim_np, + empirical_moments=empirical_moments, + moments_cov=cov_np, + params=start_params, + optimize_options=scipy_optimizers.ScipyLBFGSB, + jacobian=lambda x: np.eye(len(x)), + ) + + +def test_estimate_msm_with_algorithm(): + start_params = np.array([3, 2, 1]) + expected_params = np.zeros(3) + empirical_moments = _sim_np(expected_params) + if isinstance(empirical_moments, dict): + empirical_moments = empirical_moments["simulated_moments"] + + estimate_msm( + simulate_moments=_sim_np, + empirical_moments=empirical_moments, + moments_cov=cov_np, + params=start_params, + optimize_options=scipy_optimizers.ScipyLBFGSB(stopping_maxfun=10), + jacobian=lambda x: np.eye(len(x)), + ) + + def test_to_pickle(tmp_path): start_params = np.array([3, 2, 1]) From cc049d1501d530eac59299735f8b2442acc92771 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Thu, 31 Oct 2024 10:32:35 +0100 Subject: [PATCH 2/3] Fix results processing of nag optimizers (#544) --- .tools/envs/testenv-linux.yml | 2 +- .tools/envs/testenv-numpy.yml | 2 +- .tools/envs/testenv-others.yml | 2 +- .tools/envs/testenv-pandas.yml | 2 +- docs/source/algorithms.md | 2 ++ docs/source/installation.md | 4 ++++ environment.yml | 2 +- src/optimagic/optimizers/nag_optimizers.py | 9 ++++----- tests/optimagic/optimization/test_many_algorithms.py | 12 ++++++++++++ 9 files changed, 27 insertions(+), 10 deletions(-) diff --git a/.tools/envs/testenv-linux.yml b/.tools/envs/testenv-linux.yml index f80b48eb8..827c56d40 100644 --- a/.tools/envs/testenv-linux.yml +++ b/.tools/envs/testenv-linux.yml @@ -28,7 +28,7 @@ dependencies: - jinja2 # dev, tests - annotated-types # dev, tests - pip: # dev, tests, docs - - DFO-LS # dev, tests + - DFO-LS>=1.5.3 # dev, tests - Py-BOBYQA # dev, tests - fides==0.7.4 # dev, tests - kaleido # dev, tests diff --git a/.tools/envs/testenv-numpy.yml b/.tools/envs/testenv-numpy.yml index f54e96c17..ef75ece28 100644 --- a/.tools/envs/testenv-numpy.yml +++ b/.tools/envs/testenv-numpy.yml @@ -26,7 +26,7 @@ dependencies: - jinja2 # dev, tests - annotated-types # dev, tests - pip: # dev, tests, docs - - DFO-LS # dev, tests + - DFO-LS>=1.5.3 # dev, tests - Py-BOBYQA # dev, tests - fides==0.7.4 # dev, tests - kaleido # dev, tests diff --git a/.tools/envs/testenv-others.yml b/.tools/envs/testenv-others.yml index 0fc410f86..9b81be122 100644 --- a/.tools/envs/testenv-others.yml +++ b/.tools/envs/testenv-others.yml @@ -26,7 +26,7 @@ dependencies: - jinja2 # dev, tests - annotated-types # dev, tests - pip: # dev, tests, docs - - DFO-LS # dev, tests + - DFO-LS>=1.5.3 # dev, tests - Py-BOBYQA # dev, tests - fides==0.7.4 # dev, tests - kaleido # dev, tests diff --git a/.tools/envs/testenv-pandas.yml b/.tools/envs/testenv-pandas.yml index 6918bafae..d7e99f2be 100644 --- a/.tools/envs/testenv-pandas.yml +++ b/.tools/envs/testenv-pandas.yml @@ -26,7 +26,7 @@ dependencies: - jinja2 # dev, tests - annotated-types # dev, tests - pip: # dev, tests, docs - - DFO-LS # dev, tests + - DFO-LS>=1.5.3 # dev, tests - Py-BOBYQA # dev, tests - fides==0.7.4 # dev, tests - kaleido # dev, tests diff --git a/docs/source/algorithms.md b/docs/source/algorithms.md index 363ce5faf..006e25402 100644 --- a/docs/source/algorithms.md +++ b/docs/source/algorithms.md @@ -1083,6 +1083,8 @@ install each of them separately: ```{eval-rst} .. dropdown:: nag_dfols + *Note*: We recommend to install `DFO-LS` version 1.5.3 or higher. Versions of 1.5.0 or lower also work but the versions `1.5.1` and `1.5.2` contain bugs that can lead to errors being raised. + .. code-block:: "nag_dfols" diff --git a/docs/source/installation.md b/docs/source/installation.md index 1c4ea3ac5..912e213b6 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -40,6 +40,10 @@ pip install Py-BOBYQA pip install DFO-LS ``` +*Note*: We recommend to install `DFO-LS` version 1.5.3 or higher. Versions of 1.5.0 or +lower also work but the versions `1.5.1` and `1.5.2` contain bugs that can lead to +errors being raised. + ``` conda install -c conda-forge petsc4py ``` diff --git a/environment.yml b/environment.yml index 562d1c9cd..a608d5540 100644 --- a/environment.yml +++ b/environment.yml @@ -38,7 +38,7 @@ dependencies: - furo # dev, docs - annotated-types # dev, tests - pip: # dev, tests, docs - - DFO-LS # dev, tests + - DFO-LS>=1.5.3 # dev, tests - Py-BOBYQA # dev, tests - fides==0.7.4 # dev, tests - kaleido # dev, tests diff --git a/src/optimagic/optimizers/nag_optimizers.py b/src/optimagic/optimizers/nag_optimizers.py index a83ac7900..e708b5915 100644 --- a/src/optimagic/optimizers/nag_optimizers.py +++ b/src/optimagic/optimizers/nag_optimizers.py @@ -630,7 +630,7 @@ def nag_dfols_internal( fun=res["solution_criterion"], success=res["success"], message=res["message"], - n_iterations=int(res["n_iterations"]), + n_iterations=res["n_iterations"], n_fun_evals=res["n_fun_evals"], ) return out @@ -857,7 +857,7 @@ def nag_pybobyqa_internal( fun=res["solution_criterion"], success=res["success"], message=res["message"], - n_iterations=int(res["n_iterations"]), + n_iterations=res["n_iterations"], ) return out @@ -890,9 +890,8 @@ def _process_nag_result(nag_result_obj, len_x): "diagnostic_info": nag_result_obj.diagnostic_info, } try: - processed["n_iterations"] = nag_result_obj.diagnostic_info["iters_total"].iloc[ - -1 - ] + n_iterations = int(nag_result_obj.diagnostic_info["iters_total"].iloc[-1]) + processed["n_iterations"] = n_iterations except (KeyboardInterrupt, SystemExit): raise except Exception: diff --git a/tests/optimagic/optimization/test_many_algorithms.py b/tests/optimagic/optimization/test_many_algorithms.py index 52b7df33c..1882aeb08 100644 --- a/tests/optimagic/optimization/test_many_algorithms.py +++ b/tests/optimagic/optimization/test_many_algorithms.py @@ -86,3 +86,15 @@ def test_global_algorithms_on_sum_of_squares(algorithm): ) assert res.success in [True, None] aaae(res.params, np.array([0.2, 0]), decimal=1) + + +def test_nag_dfols_starting_at_optimum(): + # From issue: https://github.com/optimagic-dev/optimagic/issues/538 + params = np.zeros(2, dtype=float) + res = minimize( + fun=sos, + params=params, + algorithm="nag_dfols", + bounds=Bounds(-1 * np.ones_like(params), np.ones_like(params)), + ) + aaae(res.params, params) From 7cb7710b8c336fc7dbaddfe8da0287253b4ef4cd Mon Sep 17 00:00:00 2001 From: Mariam Petrosyan Date: Tue, 5 Nov 2024 08:15:02 +0100 Subject: [PATCH 3/3] Which Optimizer to Choose (#536) --- .pre-commit-config.yaml | 4 +- .tools/envs/testenv-linux.yml | 3 +- .tools/envs/testenv-numpy.yml | 3 +- .tools/envs/testenv-others.yml | 3 +- .tools/envs/testenv-pandas.yml | 3 +- docs/rtd_environment.yml | 3 + docs/source/conf.py | 4 + .../how_to/how_to_algorithm_selection.ipynb | 265 ++++++++++++++---- docs/source/how_to/how_to_globalization.ipynb | 20 ++ docs/source/how_to/index.md | 1 + environment.yml | 3 +- 11 files changed, 249 insertions(+), 63 deletions(-) create mode 100644 docs/source/how_to/how_to_globalization.ipynb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f5a8c66a3..aacd207c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,7 +68,7 @@ repos: - --blank exclude: src/optimagic/optimization/algo_options.py - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.7.2 hooks: # Run the linter. - id: ruff @@ -108,7 +108,7 @@ repos: - '88' files: (docs/.) - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.0 hooks: - id: nbstripout exclude: | diff --git a/.tools/envs/testenv-linux.yml b/.tools/envs/testenv-linux.yml index 827c56d40..1e1c0846f 100644 --- a/.tools/envs/testenv-linux.yml +++ b/.tools/envs/testenv-linux.yml @@ -8,7 +8,7 @@ dependencies: - jax - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -37,4 +37,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-numpy.yml b/.tools/envs/testenv-numpy.yml index ef75ece28..34681b9ba 100644 --- a/.tools/envs/testenv-numpy.yml +++ b/.tools/envs/testenv-numpy.yml @@ -8,7 +8,7 @@ dependencies: - numpy<2 - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-others.yml b/.tools/envs/testenv-others.yml index 9b81be122..444205593 100644 --- a/.tools/envs/testenv-others.yml +++ b/.tools/envs/testenv-others.yml @@ -6,7 +6,7 @@ channels: dependencies: - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -35,4 +35,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/.tools/envs/testenv-pandas.yml b/.tools/envs/testenv-pandas.yml index d7e99f2be..ff4996dc5 100644 --- a/.tools/envs/testenv-pandas.yml +++ b/.tools/envs/testenv-pandas.yml @@ -8,7 +8,7 @@ dependencies: - numpy<2 - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - - nlopt # dev, tests + - nlopt # dev, tests, docs - pip # dev, tests, docs - pytest # dev, tests - pytest-cov # tests @@ -34,4 +34,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs - -e ../../ diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 8407ede25..1929ec914 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -27,6 +27,7 @@ dependencies: - patsy - joblib - plotly + - nlopt - annotated-types - pip: - ../ @@ -38,3 +39,5 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs + - fides==0.7.4 # dev, tests diff --git a/docs/source/conf.py b/docs/source/conf.py index 38a4c6bb4..bf50593c3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -48,6 +48,7 @@ "sphinxcontrib.bibtex", "sphinx_panels", "sphinx_design", + "sphinxcontrib.mermaid", ] myst_enable_extensions = [ @@ -55,6 +56,9 @@ "dollarmath", "html_image", ] +myst_fence_as_directive = ["mermaid"] + + copybutton_prompt_text = ">>> " copybutton_only_copy_prompt_lines = False diff --git a/docs/source/how_to/how_to_algorithm_selection.ipynb b/docs/source/how_to/how_to_algorithm_selection.ipynb index 1620e7225..fceb44038 100644 --- a/docs/source/how_to/how_to_algorithm_selection.ipynb +++ b/docs/source/how_to/how_to_algorithm_selection.ipynb @@ -5,26 +5,86 @@ "metadata": {}, "source": [ "(how-to-select-algorithms)=\n", + "# How to select a local optimizer\n", "\n", - "# Which optimizer to use\n", + "This guide explains how to choose a local optimizer that works well for your problem. \n", + "Depending on your [strategy for global optimization](how_to_globalization.ipynb) it \n", + "is also relevant for global optimization problems. \n", "\n", - "This is a very very very short and oversimplifying guide on selecting an optimization algorithm based on a minimum of information. \n", + "## Important facts \n", "\n", + "- There is no optimizer that works well for all problems \n", + "- Making the right choice can lead to enormous speedups\n", + "- Making the wrong choice can mean that you [don't solve your problem at all](algo-selection-how-important). Sometimes,\n", + "optimizers fail silently!\n", "\n", - "To select an optimizer, you need to answer two questions:\n", "\n", - "1. Is your criterion function differentiable?\n", + "## The three steps for selecting algorithms\n", "\n", - "2. Do you have a nonlinear least squares structure (i.e. do you sum some kind of squared residuals at the end of your criterion function)?" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define some inputs\n", + "Algorithm selection is a mix of theory and experimentation. We recommend the following \n", + "steps:\n", + "\n", + "1. **Theory**: Based on the properties of your problem, start with 3 to 5 candidate algorithms. \n", + "You may use the decision tree below.\n", + "2. **Experiments**: Run the candidate algorithms for a small number of function \n", + "evaluations and compare the results in a *criterion plot*. As a rule of thumb, use \n", + "between `n_params` and `10 * n_params` evaluations. \n", + "3. **Optimization**: Re-run the algorithm with the best results until \n", + "convergence. Use the best parameter vector from the experiments as start parameters.\n", + "\n", + "We will walk you through the steps in an [example](algo-selection-example-problem)\n", + "below. These steps work well for most problems but sometimes you need \n", + "[variations](algo-selection-steps-variations).\n", + "\n", + "\n", + "## A decision tree \n", + "\n", + "This is a practical guide for narrowing down the set of algorithms to experiment with:\n", + "\n", + "```{mermaid}\n", + "graph LR\n", + " classDef highlight fill:#FF4500;\n", + " A[\"Do you have
nonlinear
constraints?\"] -- yes --> B[\"differentiable?\"]\n", + " B[\"Is your objective function differentiable?\"] -- yes --> C[\"ipopt
nlopt_slsqp
scipy_trust_constr\"]\n", + " B[\"differentiable?\"] -- no --> D[\"scipy_cobyla
nlopt_cobyla\"]\n", + "\n", + " A[\"Do you have
nonlinear constraints?\"] -- no --> E[\"Can you exploit
a least-squares
structure?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- yes --> F[\"differentiable?\"]\n", + " E[\"Can you exploit
a least-squares
structure?\"] -- no --> G[\"differentiable?\"]\n", + "\n", + " F[\"differentiable?\"] -- yes --> H[\"scipy_ls_lm
scipy_ls_trf
scipy_ls_dogleg\"]\n", + " F[\"differentiable?\"] -- no --> I[\"nag_dflos
pounders
tao_pounders\"]\n", + "\n", + " G[\"differentiable?\"] -- yes --> J[\"scipy_lbfgsb
nlopt_lbfgsb
fides\"]\n", + " G[\"differentiable?\"] -- no --> K[\"nlopt_bobyqa
nlopt_neldermead
neldermead_parallel\"]\n", "\n", - "Again, we use versions of the sphere function to illustrate how to select these algorithms in practice" + "```\n", + "\n", + "Going through the different questions will give you a list of candidate algorithms. \n", + "All algorithms in that list are designed for the same problem class but use different \n", + "approaches to solve the problem. Which of them works best for your problem can only be \n", + "found out through experimentation.\n", + "\n", + "```{note}\n", + "Many books on numerical optimization focus strongly on the inner workings of algorithms.\n", + "They will, for example, describe the difference between a trust-region algorithm and a \n", + "line-search algorithm in a lot of detail. We have an [intuitive explanation](../explanation/explanation_of_numerical_optimizers.md) of this too. Understanding these details is important for configuring and\n", + "troubleshooting optimizations, but not for algorithm selection. For example, If you have\n", + "a scalar, differentiable problem without nonlinear constraints, the decision tree \n", + "suggests `fides` and two variants of `lbfgsb`. `fides` is a trust-region algorithm, \n", + "`lbfgsb` is a line-search algorithm. Both are designed to solve the same kinds of \n", + "problems and which one works best needs to be found out through experimentation.\n", + "```\n", + "\n", + "(algo-selection-example-problem)=\n", + "\n", + "## An example problem\n", + "\n", + "As an example we use the [Trid function](https://www.sfu.ca/~ssurjano/trid.html). The Trid function has no local minimum except \n", + "the global one. It is defined for any number of dimensions, we will pick 20. As starting \n", + "values we will pick the vector [0, 1, ..., 19]. \n", + "\n", + "A Python implementation of the function and its gradient looks like this:" ] }, { @@ -33,9 +93,9 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", + "import warnings\n", "\n", - "import optimagic as om" + "warnings.filterwarnings(\"ignore\")" ] }, { @@ -44,25 +104,57 @@ "metadata": {}, "outputs": [], "source": [ - "@om.mark.least_squares\n", - "def sphere(params):\n", - " return params\n", + "import numpy as np\n", + "\n", + "import optimagic as om\n", "\n", "\n", - "def sphere_gradient(params):\n", - " return params * 2\n", + "def trid_scalar(x):\n", + " \"\"\"Implement Trid function: https://www.sfu.ca/~ssurjano/trid.html.\"\"\"\n", + " return ((x - 1) ** 2).sum() - (x[1:] * x[:-1]).sum()\n", "\n", "\n", - "start_params = np.arange(5)" + "def trid_gradient(x):\n", + " \"\"\"Calculate gradient of trid function.\"\"\"\n", + " l1 = np.insert(x, 0, 0)\n", + " l1 = np.delete(l1, [-1])\n", + " l2 = np.append(x, 0)\n", + " l2 = np.delete(l2, [0])\n", + " return 2 * (x - 1) - l1 - l2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Differentiable criterion function\n", + "### Step 1: Theory\n", + "\n", + "\n", + "\n", + "Let's go through the decision tree for the Trid function:\n", + "\n", + "1. **No** nonlinear constraints our solution needs to satisfy\n", + "2. **No** least-squares structure we can exploit \n", + "3. **Yes**, the function is differentiable. We even have a closed form gradient that \n", + "we would like to use. \n", + "\n", + "We therefore end up with the candidate algorithms `scipy_lbfgsb`, `nlopt_lbfgsb`, and \n", + "`fides`.\n", "\n", - "Use `scipy_lbfsgsb` as optimizer and provide the closed form derivative if you can. If you do not provide a derivative, optimagic will calculate it numerically. However, this is less precise and slower. " + "```{note}\n", + "If your function is differentiable but you do not have a closed form gradient (yet), \n", + "we suggest to use at least one gradient based optimizer and one gradient free optimizer.\n", + "in your experiments. Optimagic will use numerical gradients in that case. For details, \n", + "see [here](how_to_derivatives.ipynb).\n", + "```\n", + "\n", + "\n", + "### Step 2: Experiments\n", + "\n", + "To find out which algorithms work well for our problem, we simply run optimizations with\n", + "all candidate algorithms in a loop and store the result in a dictionary. We limit the \n", + "number of function evaluations to 8. Since some algorithms only support a maximum number\n", + "of iterations as stopping criterion we also limit the number of iterations to 8.\n" ] }, { @@ -71,40 +163,57 @@ "metadata": {}, "outputs": [], "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"scipy_lbfgsb\",\n", - " jac=sphere_gradient,\n", - ")\n", - "res.n_fun_evals" + "results = {}\n", + "for algo in [\"scipy_lbfgsb\", \"nlopt_lbfgsb\", \"fides\"]:\n", + " results[algo] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=np.arange(20),\n", + " algorithm=algo,\n", + " algo_options={\"stopping_maxfun\": 8, \"stopping_maxiter\": 8},\n", + " )\n", + "\n", + "fig = om.criterion_plot(results, max_evaluations=8)\n", + "fig.show(renderer=\"png\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Note that this solves a 5 dimensional problem with just 3 criterion evaluations. For higher dimensions, you will need more, but it scales very well to dozens and hundreds of parameters. \n", + "All optimizers work pretty well here and since this is a very simple problem, any of them \n", + "would probably find the optimum in a reasonable time. However, `nlopt_lbfgsb` is a bit \n", + "better than the others, so we will select it for the next step. In more difficult\n", + "examples, the difference between optimizers can be much more pronounced.\n", "\n", - "If you are worried about being stuck in a local optimum, use multistart optimization." + "### Step 3: Optimization \n", + "\n", + "All that is left to do is to run the optimization until convergence with the best \n", + "optimizer. To avoid duplicated calculations, we can already start from the previously \n", + "best parameter vector:" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Not differentiable, only scalar output" + "best_x = results[\"nlopt_lbfgsb\"].params\n", + "results[\"nlopt_lbfgsb_complete\"] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=best_x,\n", + " algorithm=\"nlopt_lbfgsb\",\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Use `nag_pybobyqa`. Note that for this you need to install the `PyBOBYQA` package if you do not already have it:\n", - " \n", - "`pip install Py-BOBYQA`\n", - "\n", - "Then you select the algorithm as follows:" + "Looking at the result in a criterion plot we can see that the optimizer converges after \n", + "a bit more than 30 function evaluations. " ] }, { @@ -113,26 +222,75 @@ "metadata": {}, "outputs": [], "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"nag_pybobyqa\",\n", - ")\n", - "res.n_fun_evals" + "fig = om.criterion_plot(results)\n", + "fig.show(renderer=\"png\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Not differentiable, least squares structure\n", + "(algo-selection-steps-variations)=\n", + "\n", + "## Variations of the four steps\n", + "\n", + "The four steps described above work very well in most situations. However, sometimes \n", + "it makes sense to deviate: \n", "\n", - "Use `nag_dfols`. To use `nag_dfols`, you need to install it via:\n", + "- If you are unsure about some of the questions in step 1, select more algorithms for \n", + "the experimentation phase and run more than 1 algorithm until convergence. \n", + "- If it is very important to find a precise optimum, run more than 1 algorithm until \n", + "convergence. \n", + "- If you have a very fast objective function, simply run all candidate algorithms until \n", + "convergence. \n", + "- If you have a differentiable objective function but no closed form derivative, use \n", + "at least one gradient based optimizer and one gradient free optimizer in the \n", + "experiments. See [here](how_to_derivatives.ipynb) to learn more about derivatives.\n", "\n", - "`pip install DFO-LS`\n", "\n", + "(algo-selection-how-important)=\n", + "\n", + "## How important was it?\n", + "\n", + "The Trid function is differentiable and very well behaved in almost every aspect. \n", + "Moreover, it has a very short runtime. One would think that any optimizer can find its \n", + "optimum. So let's compare the selected optimizer with a few others:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = {}\n", + "for algo in [\"nlopt_lbfgsb\", \"scipy_neldermead\", \"scipy_cobyla\"]:\n", + " results[algo] = om.minimize(\n", + " fun=trid_scalar,\n", + " jac=trid_gradient,\n", + " params=np.arange(20),\n", + " algorithm=algo,\n", + " )\n", + "\n", + "fig = om.criterion_plot(results)\n", + "fig.show(renderer=\"png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that our chosen optimizer solves the problem with less than 35 function \n", + "evaluations. At this point, the two gradient-free optimizers have not yet made \n", + "significant progress. CoByLA gets reasonably close to an optimum after about 4k \n", + "evaluations. Nelder-Mead gets stuck after 8k evaluations and fails to solve the problem. \n", "\n", - "This optimizer will only work if your criterion function returns a dictionary that contains the entry `root_contributions`. This needs to be a numpy array or pytree that contains the residuals of the least squares problem. " + "This example shows not only that the choice of optimizer is important but that the commonly \n", + "held belief that gradient free optimizers are generally more robust than gradient based \n", + "ones is dangerous! The Nelder-Mead algorithm did \"converge\" and reports success, but\n", + "did not find the optimum. It did not even get stuck in a local optimum because we know \n", + "that the Trid function does not have local optima except the global one. It just got \n", + "stuck somewhere. " ] }, { @@ -141,12 +299,7 @@ "metadata": {}, "outputs": [], "source": [ - "res = om.minimize(\n", - " fun=sphere,\n", - " params=start_params,\n", - " algorithm=\"nag_dfols\",\n", - ")\n", - "res.n_fun_evals" + "results[\"scipy_neldermead\"].success" ] } ], @@ -166,7 +319,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/docs/source/how_to/how_to_globalization.ipynb b/docs/source/how_to/how_to_globalization.ipynb new file mode 100644 index 000000000..1ddf45cbc --- /dev/null +++ b/docs/source/how_to/how_to_globalization.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to choose a strategy for global optimization\n", + "\n", + "(to be written)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/how_to/index.md b/docs/source/how_to/index.md index 8b7cdd37d..412b0f252 100644 --- a/docs/source/how_to/index.md +++ b/docs/source/how_to/index.md @@ -15,6 +15,7 @@ how_to_derivatives how_to_algorithm_selection how_to_bounds how_to_constraints +how_to_globalization how_to_multistart how_to_visualize_histories how_to_specify_algorithm_and_algo_options diff --git a/environment.yml b/environment.yml index a608d5540..cfd6bf6eb 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - cyipopt>=1.4.0 # dev, tests - pygmo>=2.19.0 # dev, tests - jupyterlab # dev, docs - - nlopt # dev, tests + - nlopt # dev, tests, docs - pdbpp # dev - pip # dev, tests, docs - pytest # dev, tests @@ -50,3 +50,4 @@ dependencies: - types-openpyxl # dev, tests - types-jinja2 # dev, tests - sqlalchemy-stubs # dev, tests + - sphinxcontrib-mermaid # dev, tests, docs