-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add plain Gradient Descent optimizer (Qiskit/qiskit#6459)
* implement plain Gradient Descent optimizer * replace leftover "SPSA" -> "GradientDescent" * update docstring in init * lint * add a proper docstring with examples * add callback test, mark slow test as slow_test * add reno * add test for learning rate as iterator * add stepsize to callback * Update qiskit/algorithms/optimizers/gradient_descent.py Add copyright * fix SPSA -> Gradient descent in docstrings Co-authored-by: Steve Wood <40241007+woodsp-ibm@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
- Loading branch information
1 parent
e2b7c86
commit bd3697f
Showing
4 changed files
with
335 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
# This code is part of Qiskit. | ||
# | ||
# (C) Copyright IBM 2021. | ||
# | ||
# This code is licensed under the Apache License, Version 2.0. You may | ||
# obtain a copy of this license in the LICENSE.txt file in the root directory | ||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. | ||
# | ||
# Any modifications or derivative works of this code must retain this | ||
# copyright notice, and modified files need to carry a notice indicating | ||
# that they have been altered from the originals. | ||
|
||
"""A standard gradient descent optimizer.""" | ||
|
||
from typing import Iterator, Optional, Union, Callable | ||
from functools import partial | ||
|
||
import numpy as np | ||
|
||
from .optimizer import Optimizer, OptimizerSupportLevel | ||
|
||
CALLBACK = Callable[[int, np.ndarray, float, float], None] | ||
|
||
|
||
class GradientDescent(Optimizer): | ||
r"""The gradient descent minimization routine. | ||
For a function :math:`f` and an initial point :math:`\vec\theta_0`, the standard (or "vanilla") | ||
gradient descent method is an iterative scheme to find the minimum :math:`\vec\theta^*` of | ||
:math:`f` by updating the parameters in the direction of the negative gradient of :math:`f` | ||
.. math:: | ||
\vec\theta_{n+1} = \vec\theta_{n} - \vec\eta\nabla f(\vec\theta_{n}), | ||
for a small learning rate :math:`\eta > 0`. | ||
You can either provide the analytic gradient :math:`\vec\nabla f` as ``gradient_function`` | ||
in the ``optimize`` method, or, if you do not provide it, use a finite difference approximation | ||
of the gradient. To adapt the size of the perturbation in the finite difference gradients, | ||
set the ``perturbation`` property in the initializer. | ||
This optimizer supports a callback function. If provided in the initializer, the optimizer | ||
will call the callback in each iteration with the following information in this order: | ||
current number of function values, current parameters, current function value, norm of current | ||
gradient. | ||
Examples: | ||
A minimum example that will use finite difference gradients with a default perturbation | ||
of 0.01 and a default learning rate of 0.01. | ||
.. code-block::python | ||
from qiskit.algorithms.optimizers import GradientDescent | ||
def f(x): | ||
return (np.linalg.norm(x) - 1) ** 2 | ||
initial_point = np.array([1, 0.5, -0.2]) | ||
optimizer = GradientDescent(maxiter=100) | ||
x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size, | ||
f, | ||
initial_point=initial_point) | ||
print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.") | ||
An example where the learning rate is an iterator and we supply the analytic gradient. | ||
Note how much faster this convergences (i.e. less ``nfevs``) compared to the previous | ||
example. | ||
.. code-block::python | ||
from qiskit.algorithms.optimizers import GradientDescent | ||
def learning_rate(): | ||
power = 0.6 | ||
constant_coeff = 0.1 | ||
def powerlaw(): | ||
n = 0 | ||
while True: | ||
yield constant_coeff * (n ** power) | ||
n += 1 | ||
return powerlaw() | ||
def f(x): | ||
return (np.linalg.norm(x) - 1) ** 2 | ||
def grad_f(x): | ||
return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x) | ||
initial_point = np.array([1, 0.5, -0.2]) | ||
optimizer = GradientDescent(maxiter=100, learning_rate=learning_rate) | ||
x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size, | ||
f, | ||
gradient_function=grad_f, | ||
initial_point=initial_point) | ||
print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.") | ||
""" | ||
|
||
def __init__( | ||
self, | ||
maxiter: int = 100, | ||
learning_rate: Union[float, Callable[[], Iterator]] = 0.01, | ||
tol: float = 1e-7, | ||
callback: Optional[CALLBACK] = None, | ||
perturbation: Optional[float] = None, | ||
) -> None: | ||
r""" | ||
Args: | ||
maxiter: The maximum number of iterations. | ||
learning_rate: A constant or generator yielding learning rates for the parameter | ||
updates. See the docstring for an example. | ||
tol: If the norm of the parameter update is smaller than this threshold, the | ||
optimizer is converged. | ||
perturbation: If no gradient is passed to ``GradientDescent.optimize`` the gradient is | ||
approximated with a symmetric finite difference scheme with ``perturbation`` | ||
perturbation in both directions (defaults to 1e-2 if required). | ||
Ignored if a gradient callable is passed to ``GradientDescent.optimize``. | ||
""" | ||
super().__init__() | ||
|
||
self.maxiter = maxiter | ||
self.learning_rate = learning_rate | ||
self.perturbation = perturbation | ||
self.tol = tol | ||
self.callback = callback | ||
|
||
def _minimize(self, loss, grad, initial_point): | ||
# set learning rate | ||
if isinstance(self.learning_rate, float): | ||
eta = constant(self.learning_rate) | ||
else: | ||
eta = self.learning_rate() | ||
|
||
if grad is None: | ||
eps = 0.01 if self.perturbation is None else self.perturbation | ||
grad = partial( | ||
Optimizer.gradient_num_diff, | ||
f=loss, | ||
epsilon=eps, | ||
max_evals_grouped=self._max_evals_grouped, | ||
) | ||
|
||
# prepare some initials | ||
x = np.asarray(initial_point) | ||
nfevs = 0 | ||
|
||
for _ in range(1, self.maxiter + 1): | ||
# compute update -- gradient evaluation counts as one function evaluation | ||
update = grad(x) | ||
nfevs += 1 | ||
|
||
# compute next parameter value | ||
x_next = x - next(eta) * update | ||
|
||
# send information to callback | ||
stepsize = np.linalg.norm(update) | ||
if self.callback is not None: | ||
self.callback(nfevs, x_next, loss(x_next), stepsize) | ||
|
||
# update parameters | ||
x = x_next | ||
|
||
# check termination | ||
if stepsize < self.tol: | ||
break | ||
|
||
return x, loss(x), nfevs | ||
|
||
def get_support_level(self): | ||
"""Get the support level dictionary.""" | ||
return { | ||
"gradient": OptimizerSupportLevel.supported, | ||
"bounds": OptimizerSupportLevel.ignored, | ||
"initial_point": OptimizerSupportLevel.required, | ||
} | ||
|
||
# pylint: disable=unused-argument | ||
def optimize( | ||
self, | ||
num_vars, | ||
objective_function, | ||
gradient_function=None, | ||
variable_bounds=None, | ||
initial_point=None, | ||
): | ||
return self._minimize(objective_function, gradient_function, initial_point) | ||
|
||
|
||
def constant(eta=0.01): | ||
"""Yield a constant.""" | ||
|
||
while True: | ||
yield eta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# This code is part of Qiskit. | ||
# | ||
# (C) Copyright IBM 2021. | ||
# | ||
# This code is licensed under the Apache License, Version 2.0. You may | ||
# obtain a copy of this license in the LICENSE.txt file in the root directory | ||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. | ||
# | ||
# Any modifications or derivative works of this code must retain this | ||
# copyright notice, and modified files need to carry a notice indicating | ||
# that they have been altered from the originals. | ||
|
||
"""Tests for the Gradient Descent optimizer.""" | ||
|
||
from test.python.algorithms import QiskitAlgorithmsTestCase | ||
|
||
import numpy as np | ||
|
||
from qiskit.algorithms.optimizers import GradientDescent | ||
from qiskit.circuit.library import PauliTwoDesign | ||
from qiskit.opflow import I, Z, StateFn | ||
from qiskit.test.decorators import slow_test | ||
|
||
|
||
class TestGradientDescent(QiskitAlgorithmsTestCase): | ||
"""Tests for the gradient descent optimizer.""" | ||
|
||
def setUp(self): | ||
super().setUp() | ||
np.random.seed(12) | ||
|
||
@slow_test | ||
def test_pauli_two_design(self): | ||
"""Test standard gradient descent on the Pauli two-design example.""" | ||
circuit = PauliTwoDesign(3, reps=3, seed=2) | ||
parameters = list(circuit.parameters) | ||
obs = Z ^ Z ^ I | ||
expr = ~StateFn(obs) @ StateFn(circuit) | ||
|
||
initial_point = np.array( | ||
[ | ||
0.1822308, | ||
-0.27254251, | ||
0.83684425, | ||
0.86153976, | ||
-0.7111668, | ||
0.82766631, | ||
0.97867993, | ||
0.46136964, | ||
2.27079901, | ||
0.13382699, | ||
0.29589915, | ||
0.64883193, | ||
] | ||
) | ||
|
||
def objective(x): | ||
return expr.bind_parameters(dict(zip(parameters, x))).eval().real | ||
|
||
optimizer = GradientDescent(maxiter=100, learning_rate=0.1, perturbation=0.1) | ||
|
||
result = optimizer.optimize(circuit.num_parameters, objective, initial_point=initial_point) | ||
|
||
self.assertLess(result[1], -0.95) # final loss | ||
self.assertEqual(result[2], 100) # function evaluations | ||
|
||
def test_callback(self): | ||
"""Test the callback.""" | ||
|
||
history = [] | ||
|
||
def callback(*args): | ||
history.append(args) | ||
|
||
optimizer = GradientDescent(maxiter=1, callback=callback) | ||
|
||
def objective(x): | ||
return np.linalg.norm(x) | ||
|
||
_ = optimizer.optimize(2, objective, initial_point=np.array([1, -1])) | ||
|
||
self.assertEqual(len(history), 1) | ||
self.assertIsInstance(history[0][0], int) # nfevs | ||
self.assertIsInstance(history[0][1], np.ndarray) # parameters | ||
self.assertIsInstance(history[0][2], float) # function value | ||
self.assertIsInstance(history[0][3], float) # norm of the gradient | ||
|
||
def test_iterator_learning_rate(self): | ||
"""Test setting the learning rate as iterator.""" | ||
|
||
def learning_rate(): | ||
power = 0.6 | ||
constant_coeff = 0.1 | ||
|
||
def powerlaw(): | ||
n = 0 | ||
while True: | ||
yield constant_coeff * (n ** power) | ||
n += 1 | ||
|
||
return powerlaw() | ||
|
||
def objective(x): | ||
return (np.linalg.norm(x) - 1) ** 2 | ||
|
||
def grad(x): | ||
return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x) | ||
|
||
initial_point = np.array([1, 0.5, -2]) | ||
|
||
optimizer = GradientDescent(maxiter=20, learning_rate=learning_rate) | ||
_, fx_opt, _ = optimizer.optimize( | ||
initial_point.size, objective, gradient_function=grad, initial_point=initial_point | ||
) | ||
|
||
self.assertLess(fx_opt, 1e-5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters