import numpy as np
import scipy.sparse as sparse
import scs
import warnings

import diffcp
import diffcp.cones as cone_lib
from diffcp.cone_program import pi, SolverError
import _diffcp

# `solve_and_derivative_internal` copied from `cone_program.py`,
# with `DT` edited to also return the linear system solution `r := -g` and
# the cone projection `Pi(z/|w|)` (with `w = 1`)


def solve_and_derivative_internal_EDITED(A, b, c, cone_dict, warm_start=None,
                                         mode='lsqr', raise_on_error=True,
                                         **kwargs):
    if mode not in ["dense", "lsqr"]:
        raise ValueError("Unsupported mode {}; the supported modes are "
                         "'dense' and 'lsqr'".format(mode))

    if np.isnan(A.data).any():
        raise RuntimeError("Found a NaN in A.")

    # set explicit 0s in A to np.nan
    A.data[A.data == 0] = np.nan

    # compute rows and cols of nonzeros in A
    rows, cols = A.nonzero()

    # reset np.nan entries in A to 0.0
    A.data[np.isnan(A.data)] = 0.0

    # eliminate explicit zeros in A, we no longer need them
    A.eliminate_zeros()

    data = {
        "A": A,
        "b": b,
        "c": c
    }

    if warm_start is not None:
        data["x"] = warm_start[0]
        data["y"] = warm_start[1]
        data["s"] = warm_start[2]

    kwargs.setdefault("verbose", False)
    result = scs.solve(data, cone_dict, **kwargs)

    status = result["info"]["status"]
    if status == "Solved/Inaccurate" and "acceleration_lookback" not in kwargs:
        # anderson acceleration is sometimes unstable
        result = scs.solve(data, cone_dict, acceleration_lookback=0, **kwargs)
        status = result["info"]["status"]

    if status == "Solved/Inaccurate":
        warnings.warn("Solved/Inaccurate.")
    elif status != "Solved":
        if raise_on_error:
            raise SolverError("Solver scs returned status %s" % status)
        else:
            result["D"] = None
            result["DT"] = None
            return result

    x = result["x"]
    y = result["y"]
    s = result["s"]

    # pre-compute quantities for the derivative
    m, n = A.shape
    N = m + n + 1
    cones = cone_lib.parse_cone_dict(cone_dict)
    cones_parsed = cone_lib.parse_cone_dict_cpp(cones)
    z = (x, y - s, np.array([1]))
    u, v, w = z

    Q = sparse.bmat([
        [None, A.T, np.expand_dims(c, - 1)],
        [-A, None, np.expand_dims(b, -1)],
        [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None]
    ])

    D_proj_dual_cone = _diffcp.dprojection(v, cones_parsed, True)
    if mode == "dense":
        Q_dense = Q.todense()
        M = _diffcp.M_dense(Q_dense, cones_parsed, u, v, w)
        MT = M.T
    else:
        M = _diffcp.M_operator(Q, cones_parsed, u, v, w)
        MT = M.transpose()

    pi_z = pi(z, cones)

    def derivative(dA, db, dc, **kwargs):
        """Applies derivative at (A, b, c) to perturbations dA, db, dc
        Args:
            dA: SciPy sparse matrix in CSC format; must have same sparsity
                pattern as the matrix `A` from the cone program
            db: NumPy array representing perturbation in `b`
            dc: NumPy array representing perturbation in `c`
        Returns:
           NumPy arrays dx, dy, ds, the result of applying the derivative
           to the perturbations.
        """
        dQ = sparse.bmat([
            [None, dA.T, np.expand_dims(dc, - 1)],
            [-dA, None, np.expand_dims(db, -1)],
            [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None]
        ])
        rhs = dQ @ pi_z
        if np.allclose(rhs, 0):
            dz = np.zeros(rhs.size)
        elif mode == "dense":
            dz = _diffcp._solve_derivative_dense(M, MT, rhs)
        else:
            dz = _diffcp.lsqr(M, rhs).solution

        du, dv, dw = np.split(dz, [n, n + m])
        dx = du - x * dw
        dy = D_proj_dual_cone.matvec(dv) - y * dw
        ds = D_proj_dual_cone.matvec(dv) - dv - s * dw
        return -dx, -dy, -ds

    def adjoint_derivative(dx, dy, ds, **kwargs):
        """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds
        Args:
            dx: NumPy array representing perturbation in `x`
            dy: NumPy array representing perturbation in `y`
            ds: NumPy array representing perturbation in `s`
        Returns:
            (`dA`, `db`, `dc`), the result of applying the adjoint to the
            perturbations; the sparsity pattern of `dA` matches that of `A`.
        """
        dw = -(x @ dx + y @ dy + s @ ds)
        dz = np.concatenate(
            [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])])

        if np.allclose(dz, 0):
            r = np.zeros(dz.shape)
        elif mode == "dense":
            r = _diffcp._solve_adjoint_derivative_dense(M, MT, dz)
        else:
            r = _diffcp.lsqr(MT, dz).solution

        values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols]
        dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape)
        db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m]
        dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n]

        return dA, db, dc, r, pi_z

    result["D"] = derivative
    result["DT"] = adjoint_derivative
    return result


# copied from diffcp examples
def random_cone_prog(m, n, cone_dict):
    """Returns the problem data of a random cone program."""
    cone_list = diffcp.cones.parse_cone_dict(cone_dict)
    z = np.random.randn(m)
    s_star = diffcp.cones.pi(z, cone_list, dual=False)
    y_star = s_star - z
    A = sparse.csc_matrix(np.random.randn(m, n))
    x_star = np.random.randn(n)
    b = A @ x_star + s_star
    c = -A.T @ y_star
    return A, b, c


if __name__ == "__main__":
    np.random.seed(0)
    cone_dict = {
        diffcp.ZERO: 3,
        diffcp.POS: 3,
        diffcp.SOC: [5]
    }
    m = 3 + 3 + 5
    n = 5
    A, b, c = random_cone_prog(m, n, cone_dict)
    result = solve_and_derivative_internal_EDITED(A, b, c, cone_dict)
    DT = result['DT']

    # evaluate the adjoint of the derivative
    dx = c
    dy = np.zeros(m)
    ds = np.zeros(m)
    dA, db, dc, r, pi_z = DT(dx, dy, ds)

    # form `dQ` explicitly and compare
    dQ = np.outer(-r, pi_z)  # `g = -r`
    dQ_12 = dQ[:n, n:n+m]
    dQ_21 = dQ[n:n+m, :n]
    with np.printoptions(precision=4, suppress=True, linewidth=50):
        print("-dQ_12.T + dQ_21 =")
        print(-dQ_12.T + dQ_21)
        print()
        print("dA from solve_and_derivative_internal =")
        print(dA.todense())