Skip to content

Commit

Permalink
Add plain Gradient Descent optimizer (Qiskit/qiskit#6459)
Browse files Browse the repository at this point in the history
* implement plain Gradient Descent optimizer

* replace leftover "SPSA" -> "GradientDescent"

* update docstring in init

* lint

* add a proper docstring with examples

* add callback test, mark slow test as slow_test

* add reno

* add test for learning rate as iterator

* add stepsize to callback

* Update qiskit/algorithms/optimizers/gradient_descent.py

Add copyright

* fix SPSA -> Gradient descent in docstrings

Co-authored-by: Steve Wood <40241007+woodsp-ibm@users.noreply.github.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jun 1, 2021
1 parent e2b7c86 commit bd3697f
Show file tree
Hide file tree
Showing 4 changed files with 335 additions and 4 deletions.
3 changes: 3 additions & 0 deletions qiskit_algorithms/optimizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
COBYLA
L_BFGS_B
GSLS
GradientDescent
NELDER_MEAD
NFT
P_BFGS
Expand Down Expand Up @@ -104,6 +105,7 @@
from .cg import CG
from .cobyla import COBYLA
from .gsls import GSLS
from .gradient_descent import GradientDescent
from .imfil import IMFIL
from .l_bfgs_b import L_BFGS_B
from .nelder_mead import NELDER_MEAD
Expand Down Expand Up @@ -131,6 +133,7 @@
"CG",
"COBYLA",
"GSLS",
"GradientDescent",
"L_BFGS_B",
"NELDER_MEAD",
"NFT",
Expand Down
201 changes: 201 additions & 0 deletions qiskit_algorithms/optimizers/gradient_descent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# This code is part of Qiskit.
#
# (C) Copyright IBM 2021.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""A standard gradient descent optimizer."""

from typing import Iterator, Optional, Union, Callable
from functools import partial

import numpy as np

from .optimizer import Optimizer, OptimizerSupportLevel

CALLBACK = Callable[[int, np.ndarray, float, float], None]


class GradientDescent(Optimizer):
r"""The gradient descent minimization routine.
For a function :math:`f` and an initial point :math:`\vec\theta_0`, the standard (or "vanilla")
gradient descent method is an iterative scheme to find the minimum :math:`\vec\theta^*` of
:math:`f` by updating the parameters in the direction of the negative gradient of :math:`f`
.. math::
\vec\theta_{n+1} = \vec\theta_{n} - \vec\eta\nabla f(\vec\theta_{n}),
for a small learning rate :math:`\eta > 0`.
You can either provide the analytic gradient :math:`\vec\nabla f` as ``gradient_function``
in the ``optimize`` method, or, if you do not provide it, use a finite difference approximation
of the gradient. To adapt the size of the perturbation in the finite difference gradients,
set the ``perturbation`` property in the initializer.
This optimizer supports a callback function. If provided in the initializer, the optimizer
will call the callback in each iteration with the following information in this order:
current number of function values, current parameters, current function value, norm of current
gradient.
Examples:
A minimum example that will use finite difference gradients with a default perturbation
of 0.01 and a default learning rate of 0.01.
.. code-block::python
from qiskit.algorithms.optimizers import GradientDescent
def f(x):
return (np.linalg.norm(x) - 1) ** 2
initial_point = np.array([1, 0.5, -0.2])
optimizer = GradientDescent(maxiter=100)
x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size,
f,
initial_point=initial_point)
print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.")
An example where the learning rate is an iterator and we supply the analytic gradient.
Note how much faster this convergences (i.e. less ``nfevs``) compared to the previous
example.
.. code-block::python
from qiskit.algorithms.optimizers import GradientDescent
def learning_rate():
power = 0.6
constant_coeff = 0.1
def powerlaw():
n = 0
while True:
yield constant_coeff * (n ** power)
n += 1
return powerlaw()
def f(x):
return (np.linalg.norm(x) - 1) ** 2
def grad_f(x):
return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x)
initial_point = np.array([1, 0.5, -0.2])
optimizer = GradientDescent(maxiter=100, learning_rate=learning_rate)
x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size,
f,
gradient_function=grad_f,
initial_point=initial_point)
print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.")
"""

def __init__(
self,
maxiter: int = 100,
learning_rate: Union[float, Callable[[], Iterator]] = 0.01,
tol: float = 1e-7,
callback: Optional[CALLBACK] = None,
perturbation: Optional[float] = None,
) -> None:
r"""
Args:
maxiter: The maximum number of iterations.
learning_rate: A constant or generator yielding learning rates for the parameter
updates. See the docstring for an example.
tol: If the norm of the parameter update is smaller than this threshold, the
optimizer is converged.
perturbation: If no gradient is passed to ``GradientDescent.optimize`` the gradient is
approximated with a symmetric finite difference scheme with ``perturbation``
perturbation in both directions (defaults to 1e-2 if required).
Ignored if a gradient callable is passed to ``GradientDescent.optimize``.
"""
super().__init__()

self.maxiter = maxiter
self.learning_rate = learning_rate
self.perturbation = perturbation
self.tol = tol
self.callback = callback

def _minimize(self, loss, grad, initial_point):
# set learning rate
if isinstance(self.learning_rate, float):
eta = constant(self.learning_rate)
else:
eta = self.learning_rate()

if grad is None:
eps = 0.01 if self.perturbation is None else self.perturbation
grad = partial(
Optimizer.gradient_num_diff,
f=loss,
epsilon=eps,
max_evals_grouped=self._max_evals_grouped,
)

# prepare some initials
x = np.asarray(initial_point)
nfevs = 0

for _ in range(1, self.maxiter + 1):
# compute update -- gradient evaluation counts as one function evaluation
update = grad(x)
nfevs += 1

# compute next parameter value
x_next = x - next(eta) * update

# send information to callback
stepsize = np.linalg.norm(update)
if self.callback is not None:
self.callback(nfevs, x_next, loss(x_next), stepsize)

# update parameters
x = x_next

# check termination
if stepsize < self.tol:
break

return x, loss(x), nfevs

def get_support_level(self):
"""Get the support level dictionary."""
return {
"gradient": OptimizerSupportLevel.supported,
"bounds": OptimizerSupportLevel.ignored,
"initial_point": OptimizerSupportLevel.required,
}

# pylint: disable=unused-argument
def optimize(
self,
num_vars,
objective_function,
gradient_function=None,
variable_bounds=None,
initial_point=None,
):
return self._minimize(objective_function, gradient_function, initial_point)


def constant(eta=0.01):
"""Yield a constant."""

while True:
yield eta
116 changes: 116 additions & 0 deletions test/optimizers/test_gradient_descent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# This code is part of Qiskit.
#
# (C) Copyright IBM 2021.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Tests for the Gradient Descent optimizer."""

from test.python.algorithms import QiskitAlgorithmsTestCase

import numpy as np

from qiskit.algorithms.optimizers import GradientDescent
from qiskit.circuit.library import PauliTwoDesign
from qiskit.opflow import I, Z, StateFn
from qiskit.test.decorators import slow_test


class TestGradientDescent(QiskitAlgorithmsTestCase):
"""Tests for the gradient descent optimizer."""

def setUp(self):
super().setUp()
np.random.seed(12)

@slow_test
def test_pauli_two_design(self):
"""Test standard gradient descent on the Pauli two-design example."""
circuit = PauliTwoDesign(3, reps=3, seed=2)
parameters = list(circuit.parameters)
obs = Z ^ Z ^ I
expr = ~StateFn(obs) @ StateFn(circuit)

initial_point = np.array(
[
0.1822308,
-0.27254251,
0.83684425,
0.86153976,
-0.7111668,
0.82766631,
0.97867993,
0.46136964,
2.27079901,
0.13382699,
0.29589915,
0.64883193,
]
)

def objective(x):
return expr.bind_parameters(dict(zip(parameters, x))).eval().real

optimizer = GradientDescent(maxiter=100, learning_rate=0.1, perturbation=0.1)

result = optimizer.optimize(circuit.num_parameters, objective, initial_point=initial_point)

self.assertLess(result[1], -0.95) # final loss
self.assertEqual(result[2], 100) # function evaluations

def test_callback(self):
"""Test the callback."""

history = []

def callback(*args):
history.append(args)

optimizer = GradientDescent(maxiter=1, callback=callback)

def objective(x):
return np.linalg.norm(x)

_ = optimizer.optimize(2, objective, initial_point=np.array([1, -1]))

self.assertEqual(len(history), 1)
self.assertIsInstance(history[0][0], int) # nfevs
self.assertIsInstance(history[0][1], np.ndarray) # parameters
self.assertIsInstance(history[0][2], float) # function value
self.assertIsInstance(history[0][3], float) # norm of the gradient

def test_iterator_learning_rate(self):
"""Test setting the learning rate as iterator."""

def learning_rate():
power = 0.6
constant_coeff = 0.1

def powerlaw():
n = 0
while True:
yield constant_coeff * (n ** power)
n += 1

return powerlaw()

def objective(x):
return (np.linalg.norm(x) - 1) ** 2

def grad(x):
return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x)

initial_point = np.array([1, 0.5, -2])

optimizer = GradientDescent(maxiter=20, learning_rate=learning_rate)
_, fx_opt, _ = optimizer.optimize(
initial_point.size, objective, gradient_function=grad, initial_point=initial_point
)

self.assertLess(fx_opt, 1e-5)
19 changes: 15 additions & 4 deletions test/optimizers/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@

import unittest
from test.python.algorithms import QiskitAlgorithmsTestCase

import numpy as np
from scipy.optimize import rosen
from scipy.optimize import rosen, rosen_der

from qiskit.algorithms.optimizers import (
ADAM,
CG,
COBYLA,
GSLS,
GradientDescent,
L_BFGS_B,
NELDER_MEAD,
P_BFGS,
Expand All @@ -42,9 +42,14 @@ def setUp(self):
super().setUp()
algorithm_globals.random_seed = 52

def _optimize(self, optimizer):
def _optimize(self, optimizer, grad=False):
x_0 = [1.3, 0.7, 0.8, 1.9, 1.2]
res = optimizer.optimize(len(x_0), rosen, initial_point=x_0)
if grad:
res = optimizer.optimize(
len(x_0), rosen, gradient_function=rosen_der, initial_point=x_0
)
else:
res = optimizer.optimize(len(x_0), rosen, initial_point=x_0)
np.testing.assert_array_almost_equal(res[0], [1.0] * len(x_0), decimal=2)
return res

Expand All @@ -60,6 +65,12 @@ def test_cg(self):
res = self._optimize(optimizer)
self.assertLessEqual(res[2], 10000)

def test_gradient_descent(self):
"""cg test"""
optimizer = GradientDescent(maxiter=100000, tol=1e-06, learning_rate=1e-3)
res = self._optimize(optimizer, grad=True)
self.assertLessEqual(res[2], 100000)

def test_cobyla(self):
"""cobyla test"""
optimizer = COBYLA(maxiter=100000, tol=1e-06)
Expand Down

0 comments on commit bd3697f

Please sign in to comment.