import numpy as np import scipy.sparse as sparse import scs import warnings import diffcp import diffcp.cones as cone_lib from diffcp.cone_program import pi, SolverError import _diffcp # `solve_and_derivative_internal` copied from `cone_program.py`, # with `DT` edited to also return the linear system solution `r := -g` and # the cone projection `Pi(z/|w|)` (with `w = 1`) def solve_and_derivative_internal_EDITED(A, b, c, cone_dict, warm_start=None, mode='lsqr', raise_on_error=True, **kwargs): if mode not in ["dense", "lsqr"]: raise ValueError("Unsupported mode {}; the supported modes are " "'dense' and 'lsqr'".format(mode)) if np.isnan(A.data).any(): raise RuntimeError("Found a NaN in A.") # set explicit 0s in A to np.nan A.data[A.data == 0] = np.nan # compute rows and cols of nonzeros in A rows, cols = A.nonzero() # reset np.nan entries in A to 0.0 A.data[np.isnan(A.data)] = 0.0 # eliminate explicit zeros in A, we no longer need them A.eliminate_zeros() data = { "A": A, "b": b, "c": c } if warm_start is not None: data["x"] = warm_start[0] data["y"] = warm_start[1] data["s"] = warm_start[2] kwargs.setdefault("verbose", False) result = scs.solve(data, cone_dict, **kwargs) status = result["info"]["status"] if status == "Solved/Inaccurate" and "acceleration_lookback" not in kwargs: # anderson acceleration is sometimes unstable result = scs.solve(data, cone_dict, acceleration_lookback=0, **kwargs) status = result["info"]["status"] if status == "Solved/Inaccurate": warnings.warn("Solved/Inaccurate.") elif status != "Solved": if raise_on_error: raise SolverError("Solver scs returned status %s" % status) else: result["D"] = None result["DT"] = None return result x = result["x"] y = result["y"] s = result["s"] # pre-compute quantities for the derivative m, n = A.shape N = m + n + 1 cones = cone_lib.parse_cone_dict(cone_dict) cones_parsed = cone_lib.parse_cone_dict_cpp(cones) z = (x, y - s, np.array([1])) u, v, w = z Q = sparse.bmat([ [None, A.T, np.expand_dims(c, - 1)], [-A, None, np.expand_dims(b, -1)], [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None] ]) D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True) if mode == "dense": Q_dense = Q.todense() M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w) MT = M.T else: M = _diffcp.M_operator(Q, cones_parsed, u, v, w) MT = M.transpose() pi_z = pi(z, cones) def derivative(dA, db, dc, **kwargs): """Applies derivative at (A, b, c) to perturbations dA, db, dc Args: dA: SciPy sparse matrix in CSC format; must have same sparsity pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` Returns: NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. """ dQ = sparse.bmat([ [None, dA.T, np.expand_dims(dc, - 1)], [-dA, None, np.expand_dims(db, -1)], [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None] ]) rhs = dQ @ pi_z if np.allclose(rhs, 0): dz = np.zeros(rhs.size) elif mode == "dense": dz = _diffcp._solve_derivative_dense(M, MT, rhs) else: dz = _diffcp.lsqr(M, rhs).solution du, dv, dw = np.split(dz, [n, n + m]) dx = du - x * dw dy = D_proj_dual_cone.matvec(dv) - y * dw ds = D_proj_dual_cone.matvec(dv) - dv - s * dw return -dx, -dy, -ds def adjoint_derivative(dx, dy, ds, **kwargs): """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds Args: dx: NumPy array representing perturbation in `x` dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` Returns: (`dA`, `db`, `dc`), the result of applying the adjoint to the perturbations; the sparsity pattern of `dA` matches that of `A`. """ dw = -(x @ dx + y @ dy + s @ ds) dz = np.concatenate( [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) if np.allclose(dz, 0): r = np.zeros(dz.shape) elif mode == "dense": r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz) else: r = _diffcp.lsqr(MT, dz).solution values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols] dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] return dA, db, dc, r, pi_z result["D"] = derivative result["DT"] = adjoint_derivative return result # copied from diffcp examples def random_cone_prog(m, n, cone_dict): """Returns the problem data of a random cone program.""" cone_list = diffcp.cones.parse_cone_dict(cone_dict) z = np.random.randn(m) s_star = diffcp.cones.pi(z, cone_list, dual=False) y_star = s_star - z A = sparse.csc_matrix(np.random.randn(m, n)) x_star = np.random.randn(n) b = A @ x_star + s_star c = -A.T @ y_star return A, b, c if __name__ == "__main__": np.random.seed(0) cone_dict = { diffcp.ZERO: 3, diffcp.POS: 3, diffcp.SOC: [5] } m = 3 + 3 + 5 n = 5 A, b, c = random_cone_prog(m, n, cone_dict) result = solve_and_derivative_internal_EDITED(A, b, c, cone_dict) DT = result['DT'] # evaluate the adjoint of the derivative dx = c dy = np.zeros(m) ds = np.zeros(m) dA, db, dc, r, pi_z = DT(dx, dy, ds) # form `dQ` explicitly and compare dQ = np.outer(-r, pi_z) # `g = -r` dQ_12 = dQ[:n, n:n+m] dQ_21 = dQ[n:n+m, :n] with np.printoptions(precision=4, suppress=True, linewidth=50): print("-dQ_12.T + dQ_21 =") print(-dQ_12.T + dQ_21) print() print("dA from solve_and_derivative_internal =") print(dA.todense())