Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plain Gradient Descent optimizer #6459

Merged
merged 14 commits into from
Jun 1, 2021
Merged
3 changes: 3 additions & 0 deletions qiskit/algorithms/optimizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
COBYLA
L_BFGS_B
GSLS
GradientDescent
NELDER_MEAD
NFT
P_BFGS
Expand Down Expand Up @@ -104,6 +105,7 @@
from .cg import CG
from .cobyla import COBYLA
from .gsls import GSLS
from .gradient_descent import GradientDescent
from .imfil import IMFIL
from .l_bfgs_b import L_BFGS_B
from .nelder_mead import NELDER_MEAD
Expand Down Expand Up @@ -131,6 +133,7 @@
"CG",
"COBYLA",
"GSLS",
"GradientDescent",
"L_BFGS_B",
"NELDER_MEAD",
"NFT",
Expand Down
189 changes: 189 additions & 0 deletions qiskit/algorithms/optimizers/gradient_descent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""A standard gradient descent optimizer."""

from typing import Iterator, Optional, Union, Callable
from functools import partial

import numpy as np

from .optimizer import Optimizer, OptimizerSupportLevel

CALLBACK = Callable[[int, np.ndarray, float, float], None]


class GradientDescent(Optimizer):
r"""The gradient descent minimization routine.

For a function :math:`f` and an initial point :math:`\vec\theta_0`, the standard (or "vanilla")
gradient descent method is an iterative scheme to find the minimum :math:`\vec\theta^*` of
:math:`f` by updating the parameters in the direction of the negative gradient of :math:`f`

.. math::

\vec\theta_{n+1} = \vec\theta_{n} - \vec\eta\nabla f(\vec\theta_{n}),

for a small learning rate :math:`\eta > 0`.

You can either provide the analytic gradient :math:`\vec\nabla f` as ``gradient_function``
in the ``optimize`` method, or, if you do not provide it, use a finite difference approximation
of the gradient. To adapt the size of the perturbation in the finite difference gradients,
set the ``perturbation`` property in the initializer.

This optimizer supports a callback function. If provided in the initializer, the optimizer
will call the callback in each iteration with the following information in this order:
current number of function values, current parameters, current function value, norm of current
gradient.

Examples:

A minimum example that will use finite difference gradients with a default perturbation
of 0.01 and a default learning rate of 0.01.

.. code-block::python

from qiskit.algorithms.optimizers import GradientDescent

def f(x):
return (np.linalg.norm(x) - 1) ** 2

initial_point = np.array([1, 0.5, -0.2])

optimizer = GradientDescent(maxiter=100)
x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size,
f,
initial_point=initial_point)

print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.")

An example where the learning rate is an iterator and we supply the analytic gradient.
Note how much faster this convergences (i.e. less ``nfevs``) compared to the previous
example.

.. code-block::python

from qiskit.algorithms.optimizers import GradientDescent

def learning_rate():
power = 0.6
constant_coeff = 0.1

def powerlaw():
n = 0
while True:
yield constant_coeff * (n ** power)
n += 1

return powerlaw()

def f(x):
return (np.linalg.norm(x) - 1) ** 2

def grad_f(x):
return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x)

initial_point = np.array([1, 0.5, -0.2])

optimizer = GradientDescent(maxiter=100, learning_rate=learning_rate)
x_opt, fx_opt, nfevs = optimizer.optimize(initial_point.size,
f,
gradient_function=grad_f,
initial_point=initial_point)

print(f"Found minimum {x_opt} at a value of {fx_opt} using {nfevs} evaluations.")

"""

def __init__(
self,
maxiter: int = 100,
learning_rate: Union[float, Callable[[], Iterator]] = 0.01,
tol: float = 1e-7,
callback: Optional[CALLBACK] = None,
perturbation: Optional[float] = None,
) -> None:
r"""
Args:
maxiter: The maximum number of iterations.
learning_rate: A constant or generator yielding learning rates for the parameter
updates. See the docstring for an example.
tol: If the norm of the parameter update is smaller than this threshold, the
optimizer is converged.
perturbation: If no gradient is passed to ``GradientDescent.optimize`` the gradient is
approximated with a symmetric finite difference scheme with ``perturbation``
perturbation in both directions (defaults to 1e-2 if required).
Ignored if a gradient callable is passed to ``GradientDescent.optimize``.
"""
super().__init__()

self.maxiter = maxiter
self.learning_rate = learning_rate
self.perturbation = perturbation
self.tol = tol
self.callback = callback

def _minimize(self, loss, grad, initial_point):
# set learning rate
if isinstance(self.learning_rate, float):
eta = constant(self.learning_rate)
else:
eta = self.learning_rate()

if grad is None:
eps = 0.01 if self.perturbation is None else self.perturbation
grad = partial(
Optimizer.gradient_num_diff,
f=loss,
epsilon=eps,
max_evals_grouped=self._max_evals_grouped,
)

# prepare some initials
x = np.asarray(initial_point)
nfevs = 0

for _ in range(1, self.maxiter + 1):
# compute update -- gradient evaluation counts as one function evaluation
update = grad(x)
nfevs += 1

# compute next parameter value
x_next = x - next(eta) * update

# send information to callback
stepsize = np.linalg.norm(update)
if self.callback is not None:
self.callback(nfevs, x_next, loss(x_next), stepsize)

# update parameters
x = x_next

# check termination
if stepsize < self.tol:
break

return x, loss(x), nfevs

def get_support_level(self):
"""Get the support level dictionary."""
return {
"gradient": OptimizerSupportLevel.supported,
"bounds": OptimizerSupportLevel.ignored,
"initial_point": OptimizerSupportLevel.required,
}

# pylint: disable=unused-argument
def optimize(
self,
num_vars,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not make num_vars optional, since it is unused? It is unused in the scipy optimizer, too (maybe others). Maybe fixing this in a uniform way is outside the scope of this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent question, this is part of #6382 🙂

objective_function,
gradient_function=None,
variable_bounds=None,
initial_point=None,
):
return self._minimize(objective_function, gradient_function, initial_point)


def constant(eta=0.01):
"""Yield a constant."""

while True:
yield eta
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
features:
- |
Add the standard :class:`~qiskit.algorithms.optimizers.GradientDescent` optimizer
to the collection of optimizers in the `qiskit.algorithms.optimizers` module.
For a detailed description and examples on how to use this class, please refer
to the class docstring / online docs of the class.
116 changes: 116 additions & 0 deletions test/python/algorithms/optimizers/test_gradient_descent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# This code is part of Qiskit.
#
# (C) Copyright IBM 2021.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Tests for the Gradient Descent optimizer."""

from test.python.algorithms import QiskitAlgorithmsTestCase

import numpy as np

from qiskit.algorithms.optimizers import GradientDescent
from qiskit.circuit.library import PauliTwoDesign
from qiskit.opflow import I, Z, StateFn
from qiskit.test.decorators import slow_test


class TestGradientDescent(QiskitAlgorithmsTestCase):
"""Tests for the SPSA optimizer."""

def setUp(self):
super().setUp()
np.random.seed(12)

@slow_test
def test_pauli_two_design(self):
"""Test SPSA on the Pauli two-design example."""
circuit = PauliTwoDesign(3, reps=3, seed=2)
parameters = list(circuit.parameters)
obs = Z ^ Z ^ I
expr = ~StateFn(obs) @ StateFn(circuit)

initial_point = np.array(
[
0.1822308,
-0.27254251,
0.83684425,
0.86153976,
-0.7111668,
0.82766631,
0.97867993,
0.46136964,
2.27079901,
0.13382699,
0.29589915,
0.64883193,
]
)

def objective(x):
return expr.bind_parameters(dict(zip(parameters, x))).eval().real

optimizer = GradientDescent(maxiter=100, learning_rate=0.1, perturbation=0.1)

result = optimizer.optimize(circuit.num_parameters, objective, initial_point=initial_point)

self.assertLess(result[1], -0.95) # final loss
self.assertEqual(result[2], 100) # function evaluations

def test_callback(self):
"""Test the callback."""

history = []

def callback(*args):
history.append(args)

optimizer = GradientDescent(maxiter=1, callback=callback)

def objective(x):
return np.linalg.norm(x)

_ = optimizer.optimize(2, objective, initial_point=np.array([1, -1]))

self.assertEqual(len(history), 1)
self.assertIsInstance(history[0][0], int) # nfevs
self.assertIsInstance(history[0][1], np.ndarray) # parameters
self.assertIsInstance(history[0][2], float) # function value
self.assertIsInstance(history[0][3], float) # norm of the gradient

def test_iterator_learning_rate(self):
"""Test setting the learning rate as iterator."""

def learning_rate():
power = 0.6
constant_coeff = 0.1

def powerlaw():
n = 0
while True:
yield constant_coeff * (n ** power)
n += 1

return powerlaw()

def objective(x):
return (np.linalg.norm(x) - 1) ** 2

def grad(x):
return 2 * (np.linalg.norm(x) - 1) * x / np.linalg.norm(x)

initial_point = np.array([1, 0.5, -2])

optimizer = GradientDescent(maxiter=20, learning_rate=learning_rate)
_, fx_opt, _ = optimizer.optimize(
initial_point.size, objective, gradient_function=grad, initial_point=initial_point
)

self.assertLess(fx_opt, 1e-5)
19 changes: 15 additions & 4 deletions test/python/algorithms/optimizers/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@

import unittest
from test.python.algorithms import QiskitAlgorithmsTestCase

import numpy as np
from scipy.optimize import rosen
from scipy.optimize import rosen, rosen_der

from qiskit.algorithms.optimizers import (
ADAM,
CG,
COBYLA,
GSLS,
GradientDescent,
L_BFGS_B,
NELDER_MEAD,
P_BFGS,
Expand All @@ -42,9 +42,14 @@ def setUp(self):
super().setUp()
algorithm_globals.random_seed = 52

def _optimize(self, optimizer):
def _optimize(self, optimizer, grad=False):
x_0 = [1.3, 0.7, 0.8, 1.9, 1.2]
res = optimizer.optimize(len(x_0), rosen, initial_point=x_0)
if grad:
res = optimizer.optimize(
len(x_0), rosen, gradient_function=rosen_der, initial_point=x_0
)
else:
res = optimizer.optimize(len(x_0), rosen, initial_point=x_0)
np.testing.assert_array_almost_equal(res[0], [1.0] * len(x_0), decimal=2)
return res

Expand All @@ -60,6 +65,12 @@ def test_cg(self):
res = self._optimize(optimizer)
self.assertLessEqual(res[2], 10000)

def test_gradient_descent(self):
"""cg test"""
optimizer = GradientDescent(maxiter=100000, tol=1e-06, learning_rate=1e-3)
res = self._optimize(optimizer, grad=True)
self.assertLessEqual(res[2], 100000)

def test_cobyla(self):
"""cobyla test"""
optimizer = COBYLA(maxiter=100000, tol=1e-06)
Expand Down