Skip to content

Commit

Permalink
Merge pull request #633 from WeichenXu123/fix_lbfgsb_linesearch_out_o…
Browse files Browse the repository at this point in the history
…f_bound

Fix lbfgsb linesearch out of bound and findAlpha method
  • Loading branch information
dlwh authored Apr 18, 2017
2 parents 5ae0579 + 8f1a5ec commit dc4e005
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 18 deletions.
62 changes: 50 additions & 12 deletions math/src/main/scala/breeze/optimize/LBFGSB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ package breeze.optimize

import breeze.linalg.{DenseMatrix, DenseVector}
import breeze.linalg._
import breeze.optimize.FirstOrderMinimizer.{State, ProjectedStepConverged, ConvergenceCheck}
import breeze.optimize.FirstOrderMinimizer.{ConvergenceCheck, ProjectedStepConverged, State}
import breeze.util.SerializableLogging
import breeze.util.Implicits._
import spire.syntax.cfor._

/**
* This algorithm is refered the paper
Expand Down Expand Up @@ -65,10 +66,12 @@ class LBFGSB(lowerBounds: DenseVector[Double],

//step2:compute the cauchy point by algorithm CP
val (cauchyPoint, c) = getGeneralizedCauchyPoint(state.history, x, g)
adjustWithinBound(cauchyPoint)

val dirk = if(0 == state.iter) cauchyPoint - x else {
//step3:compute a search direction d_k by the primal method
val subspaceMin = subspaceMinimization(state.history, cauchyPoint, x, c, g)
adjustWithinBound(subspaceMin)
subspaceMin - x
};

Expand All @@ -77,14 +80,48 @@ class LBFGSB(lowerBounds: DenseVector[Double],


override protected def determineStepSize(state: State, f: DiffFunction[DenseVector[Double]], direction: DenseVector[Double]): Double = {
val x = state.x
val ff = LineSearch.functionFromSearchDirection(f, x, direction)
val ff = new DiffFunction[Double] {
def calculate(alpha: Double) = {
val newX = takeStep(state, direction, alpha)
val (ff, grad) = f.calculate(newX)
ff -> (grad dot direction)
}
}
val wolfeRuleSearch = new StrongWolfeLineSearch(maxZoomIter, maxLineSearchIter) // TODO: Need good default values here.
wolfeRuleSearch.minimize(ff, 1.0)

var minStepBound = Double.PositiveInfinity
var i = 0
while (i < lowerBounds.length) {
val dir = direction(i)
if (dir != 0.0) {
val bound = if (dir < 0.0) lowerBounds(i) else upperBounds(i)
val stepBound = (bound - state.x(i)) / dir
assert(stepBound > 0.0)
if (stepBound < minStepBound) {
minStepBound = stepBound
}
}
i += 1
}

wolfeRuleSearch.minimizeWithBound(ff, 1.0, minStepBound)
}

override protected def takeStep(state: State, dir: DenseVector[Double], stepSize: Double) = {
state.x + (dir *:* stepSize)
val newX = state.x + (dir :* stepSize)
adjustWithinBound(newX)
newX
}

def adjustWithinBound(point: DenseVector[Double]): Unit = {
cforRange(0 until point.length) { i =>
if (point(i) > upperBounds(i)) {
point(i) = upperBounds(i)
}
if (point(i) < lowerBounds(i)) {
point(i) = lowerBounds(i)
}
}
}

private def initialize(f: DiffFunction[DenseVector[Double]], x0: DenseVector[Double]) = {
Expand Down Expand Up @@ -181,16 +218,17 @@ class LBFGSB(lowerBounds: DenseVector[Double],
* @param freeVarIndex
* @return starAlpha = max{a : a <= 1 and l_i-xc_i <= a*d_i <= u_i-xc_i}
*/
protected def findAlpha(xCauchy:DenseVector[Double], du:Vector[Double], freeVarIndex:Array[Int]) = {
protected def findAlpha(xCauchy: DenseVector[Double], du: Vector[Double],
freeVarIndex: Array[Int]) = {
var starAlpha = 1.0
for((vIdx, i) <- freeVarIndex.zipWithIndex) {
starAlpha = if (0 < du(i)) {
math.max(starAlpha, math.min(upperBounds(vIdx) - xCauchy(vIdx) / du(i), 1.0))
} else {
math.max(starAlpha, math.min(lowerBounds(vIdx) - xCauchy(vIdx) / du(i), 1.0))
for ((vIdx, i) <- freeVarIndex.zipWithIndex) {
if (0 < du(i)) {
starAlpha = math.min(starAlpha, (upperBounds(vIdx) - xCauchy(vIdx)) / du(i))
} else if (0 > du(i)) {
starAlpha = math.min(starAlpha, (lowerBounds(vIdx) - xCauchy(vIdx)) / du(i))
}
}

assert(starAlpha >= 0.0 && starAlpha <= 1.0)
starAlpha
}

Expand Down
28 changes: 22 additions & 6 deletions math/src/main/scala/breeze/optimize/StrongWolfe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,19 @@ class StrongWolfeLineSearch(maxZoomIter: Int, maxLineSearchIter: Int) extends Cu
val c1 = 1e-4
val c2 = 0.9

def minimize(f: DiffFunction[Double], init: Double = 1.0): Double = {
minimizeWithBound(f, init = 1.0, bound = Double.PositiveInfinity)
}

/**
* Performs a line search on the function f, returning a point satisfying
* the Strong Wolfe conditions. Based on the line search detailed in
* Nocedal & Wright Numerical Optimization p58.
* Performs a line search on the function f with bound, returning a point satisfying
* the Strong Wolfe conditions OR satisfying sufficient decrease condition and hit bound.
* Based on the line search detailed in Nocedal & Wright Numerical Optimization p58.
* BUT add some modification for bound checking.
*/
def minimize(f: DiffFunction[Double], init: Double = 1.0):Double = {
def minimizeWithBound(f: DiffFunction[Double], init: Double = 1.0, bound: Double = 1.0): Double = {

require(init <= bound, "init value should <= bound")

def phi(t: Double): Bracket = {
val (pval, pdd) = f.calculate(t)
Expand Down Expand Up @@ -171,8 +178,17 @@ class StrongWolfeLineSearch(maxZoomIter: Int, maxLineSearchIter: Int) extends Cu
}

low = c
t *= 1.5
logger.debug("Sufficent Decrease condition but not curvature condition satisfied. Increased t to: " + t)
if (t == bound) {
logger.debug("Reach bound, satisfy sufficent decrease condition," +
" but not curvature condition satisfied.")
return bound
} else {
t *= 1.5
if (t > bound) {
t = bound
}
logger.debug("Sufficent Decrease condition but not curvature condition satisfied. Increased t to: " + t)
}
}
}

Expand Down
19 changes: 19 additions & 0 deletions math/src/test/scala/breeze/optimize/LBFGSBTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,23 @@ class LBFGSBTest extends OptimizeTestBase{
assert(ures.value < res.value)
}

test("issue 572") {
val solver = new LBFGSB(DenseVector[Double](1E-12), DenseVector[Double](Double.MaxValue))

val f = new DiffFunction[DenseVector[Double]] {
override def calculate(x: DenseVector[Double]): (Double, DenseVector[Double]) = {
val cost = x(0) + 1.0/x(0)
val grad = DenseVector(1.0 - 1.0/(x(0)*x(0)))
(cost, grad)
}
}

val nearX0 = DenseVector[Double](1.5)
val nearRes = solver.minimizeAndReturnState(f, nearX0)
assert(abs(nearRes.x(0) - 1.0) < EPS)

val farX0 = DenseVector[Double](1500)
val farRes = solver.minimizeAndReturnState(f, farX0)
assert(abs(farRes.x(0) - 1.0) < EPS)
}
}

0 comments on commit dc4e005

Please sign in to comment.