diff --git a/qiskit/algorithms/optimizers/__init__.py b/qiskit/algorithms/optimizers/__init__.py index b7649ca7e1bf..7dc820edb08d 100644 --- a/qiskit/algorithms/optimizers/__init__.py +++ b/qiskit/algorithms/optimizers/__init__.py @@ -50,6 +50,7 @@ COBYLA L_BFGS_B GSLS + GradientDescent NELDER_MEAD NFT P_BFGS @@ -104,6 +105,7 @@ from .cg import CG from .cobyla import COBYLA from .gsls import GSLS +from .gradient_descent import GradientDescent from .imfil import IMFIL from .l_bfgs_b import L_BFGS_B from .nelder_mead import NELDER_MEAD @@ -131,6 +133,7 @@ "CG", "COBYLA", "GSLS", + "GradientDescent", "L_BFGS_B", "NELDER_MEAD", "NFT", diff --git a/qiskit/algorithms/optimizers/gradient_descent.py b/qiskit/algorithms/optimizers/gradient_descent.py new file mode 100644 index 000000000000..ced0abb949fd --- /dev/null +++ b/qiskit/algorithms/optimizers/gradient_descent.py @@ -0,0 +1,201 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""A standard gradient descent optimizer.""" + +from typing import Iterator, Optional, Union, Callable +from functools import partial + +import numpy as np + +from .optimizer import Optimizer, OptimizerSupportLevel + +CALLBACK = Callable[[int, np.ndarray, float, float], None] + + +class GradientDescent(Optimizer): + r"""The gradient descent minimization routine. + + For a function :math:`f` and an initial point :math:`\vec\theta_0`, the standard (or "vanilla") + gradient descent method is an iterative scheme to find the minimum :math:`\vec\theta^*` of + :math:`f` by updating the parameters in the direction of the negative gradient of :math:`f` + + .. math:: + + \vec\theta_{n+1} = \vec\theta_{n} - \vec\eta\nabla f(\vec\theta_{n}), + + for a small learning rate :math:`\eta > 0`. + + You can either provide the analytic gradient :math:`\vec\nabla f` as ``gradient_function`` + in the ``optimize`` method, or, if you do not provide it, use a finite difference approximation + of the gradient. To adapt the size of the perturbation in the finite difference gradients, + set the ``perturbation`` property in the initializer. + + This optimizer supports a callback function. If provided in the initializer, the optimizer + will call the callback in each iteration with the following information in this order: + current number of function values, current parameters, current function value, norm of current + gradient. + + Examples: + + A minimum example that will use finite difference gradients with a default perturbation + of 0.01 and a default learning rate of 0.01. + + .. code-block::python + + from qiskit.algorithms.optimizers import GradientDescent + + def f(x): + return (np.linalg.norm(x) - 1) ** 2 + + initial_point = np.array([1, 0.5, -0.2]) + + optimizer = GradientDescent(maxiter=100) + x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size, + f, + initial_point=initial_point) + + print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.") + + An example where the learning rate is an iterator and we supply the analytic gradient. + Note how much faster this convergences (i.e. less ``nfevs``) compared to the previous + example. + + .. code-block::python + + from qiskit.algorithms.optimizers import GradientDescent + + def learning_rate(): + power = 0.6 + constant_coeff = 0.1 + + def powerlaw(): + n = 0 + while True: + yield constant_coeff * (n ** power) + n += 1 + + return powerlaw() + + def f(x): + return (np.linalg.norm(x) - 1) ** 2 + + def grad_f(x): + return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x) + + initial_point = np.array([1, 0.5, -0.2]) + + optimizer = GradientDescent(maxiter=100, learning_rate=learning_rate) + x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size, + f, + gradient_function=grad_f, + initial_point=initial_point) + + print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.") + + """ + + def __init__( + self, + maxiter: int = 100, + learning_rate: Union[float, Callable[[], Iterator]] = 0.01, + tol: float = 1e-7, + callback: Optional[CALLBACK] = None, + perturbation: Optional[float] = None, + ) -> None: + r""" + Args: + maxiter: The maximum number of iterations. + learning_rate: A constant or generator yielding learning rates for the parameter + updates. See the docstring for an example. + tol: If the norm of the parameter update is smaller than this threshold, the + optimizer is converged. + perturbation: If no gradient is passed to ``GradientDescent.optimize`` the gradient is + approximated with a symmetric finite difference scheme with ``perturbation`` + perturbation in both directions (defaults to 1e-2 if required). + Ignored if a gradient callable is passed to ``GradientDescent.optimize``. + """ + super().__init__() + + self.maxiter = maxiter + self.learning_rate = learning_rate + self.perturbation = perturbation + self.tol = tol + self.callback = callback + + def _minimize(self, loss, grad, initial_point): + # set learning rate + if isinstance(self.learning_rate, float): + eta = constant(self.learning_rate) + else: + eta = self.learning_rate() + + if grad is None: + eps = 0.01 if self.perturbation is None else self.perturbation + grad = partial( + Optimizer.gradient_num_diff, + f=loss, + epsilon=eps, + max_evals_grouped=self._max_evals_grouped, + ) + + # prepare some initials + x = np.asarray(initial_point) + nfevs = 0 + + for _ in range(1, self.maxiter + 1): + # compute update -- gradient evaluation counts as one function evaluation + update = grad(x) + nfevs += 1 + + # compute next parameter value + x_next = x - next(eta) * update + + # send information to callback + stepsize = np.linalg.norm(update) + if self.callback is not None: + self.callback(nfevs, x_next, loss(x_next), stepsize) + + # update parameters + x = x_next + + # check termination + if stepsize < self.tol: + break + + return x, loss(x), nfevs + + def get_support_level(self): + """Get the support level dictionary.""" + return { + "gradient": OptimizerSupportLevel.supported, + "bounds": OptimizerSupportLevel.ignored, + "initial_point": OptimizerSupportLevel.required, + } + + # pylint: disable=unused-argument + def optimize( + self, + num_vars, + objective_function, + gradient_function=None, + variable_bounds=None, + initial_point=None, + ): + return self._minimize(objective_function, gradient_function, initial_point) + + +def constant(eta=0.01): + """Yield a constant.""" + + while True: + yield eta diff --git a/releasenotes/notes/gradient-descent-optimizer-96a7a0eeb9502cef.yaml b/releasenotes/notes/gradient-descent-optimizer-96a7a0eeb9502cef.yaml new file mode 100644 index 000000000000..eee28b89187b --- /dev/null +++ b/releasenotes/notes/gradient-descent-optimizer-96a7a0eeb9502cef.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Add the standard :class:`~qiskit.algorithms.optimizers.GradientDescent` optimizer + to the collection of optimizers in the `qiskit.algorithms.optimizers` module. + For a detailed description and examples on how to use this class, please refer + to the class docstring / online docs of the class. diff --git a/test/python/algorithms/optimizers/test_gradient_descent.py b/test/python/algorithms/optimizers/test_gradient_descent.py new file mode 100644 index 000000000000..d59afe42bfda --- /dev/null +++ b/test/python/algorithms/optimizers/test_gradient_descent.py @@ -0,0 +1,116 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Tests for the Gradient Descent optimizer.""" + +from test.python.algorithms import QiskitAlgorithmsTestCase + +import numpy as np + +from qiskit.algorithms.optimizers import GradientDescent +from qiskit.circuit.library import PauliTwoDesign +from qiskit.opflow import I, Z, StateFn +from qiskit.test.decorators import slow_test + + +class TestGradientDescent(QiskitAlgorithmsTestCase): + """Tests for the gradient descent optimizer.""" + + def setUp(self): + super().setUp() + np.random.seed(12) + + @slow_test + def test_pauli_two_design(self): + """Test standard gradient descent on the Pauli two-design example.""" + circuit = PauliTwoDesign(3, reps=3, seed=2) + parameters = list(circuit.parameters) + obs = Z ^ Z ^ I + expr = ~StateFn(obs) @ StateFn(circuit) + + initial_point = np.array( + [ + 0.1822308, + -0.27254251, + 0.83684425, + 0.86153976, + -0.7111668, + 0.82766631, + 0.97867993, + 0.46136964, + 2.27079901, + 0.13382699, + 0.29589915, + 0.64883193, + ] + ) + + def objective(x): + return expr.bind_parameters(dict(zip(parameters, x))).eval().real + + optimizer = GradientDescent(maxiter=100, learning_rate=0.1, perturbation=0.1) + + result = optimizer.optimize(circuit.num_parameters, objective, initial_point=initial_point) + + self.assertLess(result[1], -0.95) # final loss + self.assertEqual(result[2], 100) # function evaluations + + def test_callback(self): + """Test the callback.""" + + history = [] + + def callback(*args): + history.append(args) + + optimizer = GradientDescent(maxiter=1, callback=callback) + + def objective(x): + return np.linalg.norm(x) + + _ = optimizer.optimize(2, objective, initial_point=np.array([1, -1])) + + self.assertEqual(len(history), 1) + self.assertIsInstance(history[0][0], int) # nfevs + self.assertIsInstance(history[0][1], np.ndarray) # parameters + self.assertIsInstance(history[0][2], float) # function value + self.assertIsInstance(history[0][3], float) # norm of the gradient + + def test_iterator_learning_rate(self): + """Test setting the learning rate as iterator.""" + + def learning_rate(): + power = 0.6 + constant_coeff = 0.1 + + def powerlaw(): + n = 0 + while True: + yield constant_coeff * (n ** power) + n += 1 + + return powerlaw() + + def objective(x): + return (np.linalg.norm(x) - 1) ** 2 + + def grad(x): + return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x) + + initial_point = np.array([1, 0.5, -2]) + + optimizer = GradientDescent(maxiter=20, learning_rate=learning_rate) + _, fx_opt, _ = optimizer.optimize( + initial_point.size, objective, gradient_function=grad, initial_point=initial_point + ) + + self.assertLess(fx_opt, 1e-5) diff --git a/test/python/algorithms/optimizers/test_optimizers.py b/test/python/algorithms/optimizers/test_optimizers.py index e446c23d4b33..4c8b197014dd 100644 --- a/test/python/algorithms/optimizers/test_optimizers.py +++ b/test/python/algorithms/optimizers/test_optimizers.py @@ -14,15 +14,15 @@ import unittest from test.python.algorithms import QiskitAlgorithmsTestCase - import numpy as np -from scipy.optimize import rosen +from scipy.optimize import rosen, rosen_der from qiskit.algorithms.optimizers import ( ADAM, CG, COBYLA, GSLS, + GradientDescent, L_BFGS_B, NELDER_MEAD, P_BFGS, @@ -42,9 +42,14 @@ def setUp(self): super().setUp() algorithm_globals.random_seed = 52 - def _optimize(self, optimizer): + def _optimize(self, optimizer, grad=False): x_0 = [1.3, 0.7, 0.8, 1.9, 1.2] - res = optimizer.optimize(len(x_0), rosen, initial_point=x_0) + if grad: + res = optimizer.optimize( + len(x_0), rosen, gradient_function=rosen_der, initial_point=x_0 + ) + else: + res = optimizer.optimize(len(x_0), rosen, initial_point=x_0) np.testing.assert_array_almost_equal(res[0], [1.0] * len(x_0), decimal=2) return res @@ -60,6 +65,12 @@ def test_cg(self): res = self._optimize(optimizer) self.assertLessEqual(res[2], 10000) + def test_gradient_descent(self): + """cg test""" + optimizer = GradientDescent(maxiter=100000, tol=1e-06, learning_rate=1e-3) + res = self._optimize(optimizer, grad=True) + self.assertLessEqual(res[2], 100000) + def test_cobyla(self): """cobyla test""" optimizer = COBYLA(maxiter=100000, tol=1e-06)