From 19af82a6fcea7c20ca1d26c6c898ed4684eede8a Mon Sep 17 00:00:00 2001
From: Liangfu Chen <chenclf@gmail.com>
Date: Wed, 20 Jun 2018 22:11:16 +0800
Subject: [PATCH 01/36] [SPARSE] adjust implement regarding to suggestions;

---
 python/tvm/contrib/sparse.py        | 254 ++++++++++++++++++++++++++++
 tests/python/contrib/test_sparse.py |  26 +++
 2 files changed, 280 insertions(+)
 create mode 100644 python/tvm/contrib/sparse.py
 create mode 100644 tests/python/contrib/test_sparse.py

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
new file mode 100644
index 000000000000..6197b5aa3347
--- /dev/null
+++ b/python/tvm/contrib/sparse.py
@@ -0,0 +1,254 @@
+"""Tensor and Operation class for computation declaration."""
+# pylint: disable=invalid-name
+from __future__ import absolute_import as _abs
+from .._ffi.node import NodeBase, NodeGeneric, register_node, convert_to_node
+from .. import _api_internal
+from .. import make as _make
+from .. import expr as _expr
+from .. import api as _api
+
+class TensorSlice(NodeGeneric, _expr.ExprOp):
+    """Auxiliary data structure for enable slicing syntax from tensor."""
+    def __init__(self, tensor, indices):
+        if not isinstance(indices, tuple):
+            indices = (indices,)
+        self.tensor = tensor
+        self.indices = indices
+
+    def __getitem__(self, indices):
+        if not isinstance(indices, tuple):
+            indices = (indices,)
+        return TensorSlice(self.tensor, self.indices + indices)
+
+    def asnode(self):
+        """Convert slice to node."""
+        return self.tensor(*self.indices)
+
+    @property
+    def dtype(self):
+        """Data content of the tensor."""
+        return self.tensor.dtype
+
+itervar_cls = None
+
+@register_node
+class CSRTensor(NodeBase, _expr.ExprOp):
+    """Tensor object, to construct, see function.Tensor"""
+    def __call__(self, *indices):
+        ndim = self.ndim
+        if len(indices) != ndim:
+            raise ValueError("Need to provide %d index in tensor slice" % ndim)
+        indices = convert_to_node(indices)
+        args = []
+        for x in indices:
+            if isinstance(x, _expr.Expr):
+                args.append(x)
+            elif isinstance(x, iter_var_cls):
+                args.append(x.var)
+            else:
+                raise ValueError("The indices must be expression")
+
+        return _make.Call(self.dtype, self.op.name,
+                          args, _expr.Call.Halide,
+                          self.op, self.value_index)
+
+    def __getitem__(self, indices):
+        return TensorSlice(self, indices)
+
+    def __hash__(self):
+        return _api_internal._TensorHash(self)
+
+    def __eq__(self, other):
+        if not isinstance(other, Tensor):
+            if isinstance(other, _expr.ExprOp):
+                return _expr.EqualOp(self, other)
+            return False
+        if self.ndim == 0 and other.ndim == 0:
+            raise ValueError("Equal == comparison among rank-0 tensor is ambiguous, "
+                             "use Tensor.equal for content expression equvalence, "
+                             "use Tensor.same_as for exact reference comparison")
+        return _api_internal._TensorEqual(self, other)
+
+    @property
+    def ndim(self):
+        """Dimension of the tensor."""
+        return len(self.shape)
+
+    @property
+    def axis(self):
+        """Axis of the tensor."""
+        return self.__getattr__("axis")
+
+    @property
+    def op(self):
+        """The corressponding :any:`Operation`."""
+        return self.__getattr__("op")
+
+    @property
+    def value_index(self):
+        """The output value index the tensor corressponds to."""
+        return self.__getattr__("value_index")
+
+    @property
+    def shape(self):
+        """The output shape of the tensor."""
+        return self.__getattr__("shape")
+
+    @property
+    def name(self):
+        op = self.op
+        if op.num_outputs == 1:
+            return op.name
+        return "%s.v%d" % (op.name, self.value_index)
+
+
+class Operation(NodeBase):
+    """Represent an operation that generate a tensor"""
+    def output(self, index):
+        """Get the index-th output of the operation
+
+        Parameters
+        ----------
+        index : int
+            The index size.
+
+        Returns
+        -------
+        out : Tensor
+            The i-th output.
+        """
+        return _api_internal._OpGetOutput(self, index)
+
+    @property
+    def num_outputs(self):
+        """Number of outputs of this op."""
+        return _api_internal._OpNumOutputs(self)
+
+    @property
+    def input_tensors(self):
+        """List of input tensors to this op."""
+        return _api_internal._OpInputTensors(self)
+
+
+@register_node
+class PlaceholderOp(Operation):
+    """Placeholder operation."""
+    pass
+
+
+@register_node
+class ComputeOp(Operation):
+    """Compute operation."""
+    @property
+    def axis(self):
+        """Represent axis of IterVar, only defined when it is a ComputeOp"""
+        return self.__getattr__("axis")
+
+    @property
+    def reduce_axis(self):
+        """Represent axis of reductions, only defined when it is a ComputeOp"""
+        return self.__getattr__("reduce_axis")
+
+
+@register_node
+class ScanOp(Operation):
+    """Scan operation."""
+    @property
+    def scan_axis(self):
+        """Represent axis of scan, only defined when it is a ScanOp"""
+        return self.__getattr__("scan_axis")
+
+
+@register_node
+class ExternOp(Operation):
+    """Extern operation."""
+    pass
+
+float32 = "float32"
+csr = "csr"
+
+class Placeholder(object):
+    def __init__(self, shape, dtype, name, stype):
+        super(Placeholder, self).__init__()
+        self.shape = shape
+        self.dtype = dtype
+        self.name = name
+        self.stype = stype
+        shape = (0,)
+        self.data = _api.placeholder(shape, dtype, name+'_data')
+        self.indices = _api.placeholder(shape, dtype, name+'_indices')
+        self.indptr = _api.placeholder(shape, dtype, name+'_indptr')
+
+def placeholder(shape, dtype=None, name="placeholder", stype=None):
+    """Construct an empty tensor object.
+
+    Parameters
+    ----------
+    shape: Tuple of Expr
+        The shape of the tensor
+
+    dtype: str, optional
+        The data type of the tensor
+
+    name: str, optional
+        The name hint of the tensor
+
+    Returns
+    -------
+    tensor: Tensor
+        The created tensor
+    """
+    shape = (shape,) if isinstance(shape, _expr.Expr) else shape
+    dtype = float32 if dtype is None else dtype
+    stype = csr if stype is None else stype
+    return Placeholder(shape, dtype, name, stype)
+
+def compute(shape, fcompute, name="compute", tag=""):
+    """Construct a new tensor by computing over the shape domain.
+
+    The compute rule is result[axis] = fcompute(axis)
+
+    Parameters
+    ----------
+    shape: Tuple of Expr
+        The shape of the tensor
+
+    fcompute: lambda function of indices-> value
+        Specifies the input source expression
+
+    name: str, optional
+        The name hint of the tensor
+
+    Returns
+    -------
+    tensor: Tensor
+        The created tensor
+    """
+    if _tag.TagScope.current is not None:
+        if tag != "":
+            raise ValueError("nested tag is not allowed for now")
+        tag = _tag.TagScope.current.tag
+    shape = (shape,) if isinstance(shape, _expr.Expr) else shape
+    ndim = len(shape)
+    code = fcompute.__code__
+
+    if fcompute.__code__.co_argcount == 0:
+        arg_names = ["i%d" % i for i in range(ndim)]
+    else:
+        arg_names = code.co_varnames[:code.co_argcount]
+
+    if ndim != len(arg_names):
+        raise ValueError("fcompute do not match dimension, ndim=%d" % ndim)
+
+    dim_var = [_IterVar((0, s), x, 0) for x, s in zip(arg_names, shape)]
+    body = fcompute(*[v.var for v in dim_var])
+    if not isinstance(body, (list, tuple)):
+        body = [body]
+    body = convert(body)
+    op_node = _api_internal._ComputeOp(
+        name, tag, dim_var, body)
+    num = op_node.num_outputs
+    outputs = tuple(op_node.output(i) for i in range(num))
+    return outputs[0] if num == 1 else outputs
+
+
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
new file mode 100644
index 000000000000..2fb5b334a4bb
--- /dev/null
+++ b/tests/python/contrib/test_sparse.py
@@ -0,0 +1,26 @@
+import os, sys
+thisdir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.join(thisdir, '../../../python'))
+
+import tvm
+import tvm.contrib.sparse as tvmsp
+
+def test_tensor():
+    dtype = 'float32'
+    stype = 'csr'
+    m = tvm.var('m')
+    n = tvm.var('n')
+    l = tvm.var('l')
+    A = tvmsp.placeholder((m, ), name='A', stype=stype, dtype=dtype)
+    B = tvmsp.placeholder((n, ), name='B', stype=stype, dtype=dtype)
+    print(A)
+    assert(A.stype == 'csr')
+    assert(B.stype == 'csr')
+    assert(A.data.shape == (0,))
+    assert(A.indices.shape == (0,))
+    assert(A.indptr.shape == (0,))
+    # T = tvmsp.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k])
+
+if __name__ == "__main__":
+    test_tensor()
+

From 6030a640557f6f7e0be6618df36ce418be0d2dcc Mon Sep 17 00:00:00 2001
From: Liangfu Chen <chenclf@gmail.com>
Date: Wed, 20 Jun 2018 22:55:13 +0800
Subject: [PATCH 02/36] fix pylint;

---
 python/tvm/contrib/sparse.py        | 23 +++++++++++++++++++----
 tests/python/contrib/test_sparse.py |  8 +++-----
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 6197b5aa3347..0cb73d559944 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -168,7 +168,24 @@ class ExternOp(Operation):
 csr = "csr"
 
 class Placeholder(object):
+    """Placeholder class for csr based sparse tensor representation."""
     def __init__(self, shape, dtype, name, stype):
+        """Contructing a bare bone structure for a csr_matrix
+
+        Parameters
+        ----------
+        shape: Tuple of Expr
+            The shape of the tensor
+
+        dtype: str, optional
+            The data type of the tensor
+
+        name: str, optional
+            The name hint of the tensor
+
+        stype: str, optional
+            The storage type of the tensor
+        """
         super(Placeholder, self).__init__()
         self.shape = shape
         self.dtype = dtype
@@ -176,8 +193,8 @@ def __init__(self, shape, dtype, name, stype):
         self.stype = stype
         shape = (0,)
         self.data = _api.placeholder(shape, dtype, name+'_data')
-        self.indices = _api.placeholder(shape, dtype, name+'_indices')
-        self.indptr = _api.placeholder(shape, dtype, name+'_indptr')
+        self.indices = _api.placeholder(shape, 'int32', name+'_indices')
+        self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
 
 def placeholder(shape, dtype=None, name="placeholder", stype=None):
     """Construct an empty tensor object.
@@ -250,5 +267,3 @@ def compute(shape, fcompute, name="compute", tag=""):
     num = op_node.num_outputs
     outputs = tuple(op_node.output(i) for i in range(num))
     return outputs[0] if num == 1 else outputs
-
-
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 2fb5b334a4bb..be68745131a9 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -13,13 +13,11 @@ def test_tensor():
     l = tvm.var('l')
     A = tvmsp.placeholder((m, ), name='A', stype=stype, dtype=dtype)
     B = tvmsp.placeholder((n, ), name='B', stype=stype, dtype=dtype)
-    print(A)
+    print(vars(A))
     assert(A.stype == 'csr')
     assert(B.stype == 'csr')
-    assert(A.data.shape == (0,))
-    assert(A.indices.shape == (0,))
-    assert(A.indptr.shape == (0,))
-    # T = tvmsp.compute((m, n, l), lambda i, j, k: A[i, k] * B[j, k])
+    shape = [0]
+    assert(str(A.data.shape) == str(shape))
 
 if __name__ == "__main__":
     test_tensor()

From 77329467eff1569b7a5550ace9f4f17f610a6180 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <chenclf@gmail.com>
Date: Wed, 20 Jun 2018 23:36:06 +0800
Subject: [PATCH 03/36] derive from PlaceholderOp;

---
 python/tvm/contrib/sparse.py        | 5 +++--
 tests/python/contrib/test_sparse.py | 4 +---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 0cb73d559944..51d1e3c8571f 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -167,7 +167,7 @@ class ExternOp(Operation):
 float32 = "float32"
 csr = "csr"
 
-class Placeholder(object):
+class Placeholder(PlaceholderOp):
     """Placeholder class for csr based sparse tensor representation."""
     def __init__(self, shape, dtype, name, stype):
         """Contructing a bare bone structure for a csr_matrix
@@ -186,7 +186,7 @@ def __init__(self, shape, dtype, name, stype):
         stype: str, optional
             The storage type of the tensor
         """
-        super(Placeholder, self).__init__()
+        super(Placeholder, self).__init__(self)
         self.shape = shape
         self.dtype = dtype
         self.name = name
@@ -267,3 +267,4 @@ def compute(shape, fcompute, name="compute", tag=""):
     num = op_node.num_outputs
     outputs = tuple(op_node.output(i) for i in range(num))
     return outputs[0] if num == 1 else outputs
+
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index be68745131a9..7db0be49d508 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -9,10 +9,8 @@ def test_tensor():
     dtype = 'float32'
     stype = 'csr'
     m = tvm.var('m')
-    n = tvm.var('n')
-    l = tvm.var('l')
     A = tvmsp.placeholder((m, ), name='A', stype=stype, dtype=dtype)
-    B = tvmsp.placeholder((n, ), name='B', stype=stype, dtype=dtype)
+    B = tvmsp.placeholder((m, ), name='B', stype=stype, dtype=dtype)
     print(vars(A))
     assert(A.stype == 'csr')
     assert(B.stype == 'csr')

From 8b660e70e93638e2891b7d7d299ba5ebf9e2ebed Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 21 Jun 2018 16:22:27 +0800
Subject: [PATCH 04/36] [Sparse] added CSRTensor and a placeholder for sparse
 tensors;

---
 python/tvm/_ffi/ndarray.py            |  18 ++
 python/tvm/contrib/sparse.py          | 236 ++++----------------------
 tests/python/contrib/test_sparse.py   |  20 ++-
 topi/python/topi/__init__.py          |   1 +
 topi/python/topi/sparse/__init__.py   |   6 +
 topi/python/topi/sparse/dense.py      |  69 ++++++++
 topi/python/topi/sparse/elemwise.py   |  21 +++
 topi/tests/python/test_topi_sparse.py |  63 +++++++
 8 files changed, 222 insertions(+), 212 deletions(-)
 create mode 100644 topi/python/topi/sparse/__init__.py
 create mode 100644 topi/python/topi/sparse/dense.py
 create mode 100644 topi/python/topi/sparse/elemwise.py
 create mode 100644 topi/tests/python/test_topi_sparse.py

diff --git a/python/tvm/_ffi/ndarray.py b/python/tvm/_ffi/ndarray.py
index 3788c07ac440..a879eb7cccaa 100644
--- a/python/tvm/_ffi/ndarray.py
+++ b/python/tvm/_ffi/ndarray.py
@@ -260,6 +260,24 @@ def copyto(self, target):
             raise ValueError("Unsupported target type %s" % str(type(target)))
         return target
 
+    def tostype(self, stype):
+        """Convert this array to numpy array
+
+        Returns
+        -------
+        np_arr : numpy.ndarray
+            The corresponding numpy array.
+        """
+        from ..contrib import sparse as tvmsp
+        if stype == 'csr':
+            return tvmsp.CSRTensor(shape=self.shape, dtype=self.dtype)
+        elif stype == 'dense':
+            return self
+        else:
+            raise RuntimeError("unknown stype: %s, valid options "
+                               "are `csr` and `dense`." % (stype,))
+            return None
+
 def free_extension_handle(handle, type_code):
     """Free c++ extension type handle
 
diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 51d1e3c8571f..4c99f30a9fc9 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -6,168 +6,39 @@
 from .. import make as _make
 from .. import expr as _expr
 from .. import api as _api
-
-class TensorSlice(NodeGeneric, _expr.ExprOp):
-    """Auxiliary data structure for enable slicing syntax from tensor."""
-    def __init__(self, tensor, indices):
-        if not isinstance(indices, tuple):
-            indices = (indices,)
-        self.tensor = tensor
-        self.indices = indices
-
-    def __getitem__(self, indices):
-        if not isinstance(indices, tuple):
-            indices = (indices,)
-        return TensorSlice(self.tensor, self.indices + indices)
-
-    def asnode(self):
-        """Convert slice to node."""
-        return self.tensor(*self.indices)
-
-    @property
-    def dtype(self):
-        """Data content of the tensor."""
-        return self.tensor.dtype
-
-itervar_cls = None
+from .. import tag as _tag
+from .. import tensor as _tensor
 
 @register_node
-class CSRTensor(NodeBase, _expr.ExprOp):
+class CSRTensor(object):
     """Tensor object, to construct, see function.Tensor"""
-    def __call__(self, *indices):
-        ndim = self.ndim
-        if len(indices) != ndim:
-            raise ValueError("Need to provide %d index in tensor slice" % ndim)
-        indices = convert_to_node(indices)
-        args = []
-        for x in indices:
-            if isinstance(x, _expr.Expr):
-                args.append(x)
-            elif isinstance(x, iter_var_cls):
-                args.append(x.var)
-            else:
-                raise ValueError("The indices must be expression")
-
-        return _make.Call(self.dtype, self.op.name,
-                          args, _expr.Call.Halide,
-                          self.op, self.value_index)
-
-    def __getitem__(self, indices):
-        return TensorSlice(self, indices)
-
-    def __hash__(self):
-        return _api_internal._TensorHash(self)
-
-    def __eq__(self, other):
-        if not isinstance(other, Tensor):
-            if isinstance(other, _expr.ExprOp):
-                return _expr.EqualOp(self, other)
-            return False
-        if self.ndim == 0 and other.ndim == 0:
-            raise ValueError("Equal == comparison among rank-0 tensor is ambiguous, "
-                             "use Tensor.equal for content expression equvalence, "
-                             "use Tensor.same_as for exact reference comparison")
-        return _api_internal._TensorEqual(self, other)
-
-    @property
-    def ndim(self):
-        """Dimension of the tensor."""
-        return len(self.shape)
-
-    @property
-    def axis(self):
-        """Axis of the tensor."""
-        return self.__getattr__("axis")
-
-    @property
-    def op(self):
-        """The corressponding :any:`Operation`."""
-        return self.__getattr__("op")
-
-    @property
-    def value_index(self):
-        """The output value index the tensor corressponds to."""
-        return self.__getattr__("value_index")
-
-    @property
-    def shape(self):
-        """The output shape of the tensor."""
-        return self.__getattr__("shape")
-
-    @property
-    def name(self):
-        op = self.op
-        if op.num_outputs == 1:
-            return op.name
-        return "%s.v%d" % (op.name, self.value_index)
-
-
-class Operation(NodeBase):
-    """Represent an operation that generate a tensor"""
-    def output(self, index):
-        """Get the index-th output of the operation
-
-        Parameters
-        ----------
-        index : int
-            The index size.
-
-        Returns
-        -------
-        out : Tensor
-            The i-th output.
-        """
-        return _api_internal._OpGetOutput(self, index)
-
-    @property
-    def num_outputs(self):
-        """Number of outputs of this op."""
-        return _api_internal._OpNumOutputs(self)
-
-    @property
-    def input_tensors(self):
-        """List of input tensors to this op."""
-        return _api_internal._OpInputTensors(self)
-
-
-@register_node
-class PlaceholderOp(Operation):
-    """Placeholder operation."""
-    pass
-
-
-@register_node
-class ComputeOp(Operation):
-    """Compute operation."""
-    @property
-    def axis(self):
-        """Represent axis of IterVar, only defined when it is a ComputeOp"""
-        return self.__getattr__("axis")
-
-    @property
-    def reduce_axis(self):
-        """Represent axis of reductions, only defined when it is a ComputeOp"""
-        return self.__getattr__("reduce_axis")
-
-
-@register_node
-class ScanOp(Operation):
-    """Scan operation."""
-    @property
-    def scan_axis(self):
-        """Represent axis of scan, only defined when it is a ScanOp"""
-        return self.__getattr__("scan_axis")
-
-
-@register_node
-class ExternOp(Operation):
-    """Extern operation."""
-    pass
+    def __init__(self, shape, dtype='float32', name='',
+                 data=None, indices=None, indptr=None):
+        self.stype = 'csr'
+        self.shape = shape
+        self.dtype = dtype
+        self.name = name
+        if data is None:
+            self.data = _api.placeholder(shape, dtype, name+'_data')
+        else:
+            self.data = data
+        if indices is None:
+            self.indices = _api.placeholder(shape, 'int32', name+'_indices')
+        else:
+            self.indices = indices
+        if indptr is None:
+            self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
+        else:
+            self.indptr = indptr
+        assert isinstance(self.data, _tensor.Tensor)
+        assert isinstance(self.indices, _tensor.Tensor)
+        assert isinstance(self.indptr, _tensor.Tensor)
 
 float32 = "float32"
 csr = "csr"
 
-class Placeholder(PlaceholderOp):
+@register_node
+class CSRPlaceholderOp(_tensor.Operation):
     """Placeholder class for csr based sparse tensor representation."""
     def __init__(self, shape, dtype, name, stype):
         """Contructing a bare bone structure for a csr_matrix
@@ -186,7 +57,7 @@ def __init__(self, shape, dtype, name, stype):
         stype: str, optional
             The storage type of the tensor
         """
-        super(Placeholder, self).__init__(self)
+        super(CSRPlaceholderOp, self).__init__(self)
         self.shape = shape
         self.dtype = dtype
         self.name = name
@@ -212,59 +83,10 @@ def placeholder(shape, dtype=None, name="placeholder", stype=None):
 
     Returns
     -------
-    tensor: Tensor
+    tensor: CSRTensor
         The created tensor
     """
     shape = (shape,) if isinstance(shape, _expr.Expr) else shape
     dtype = float32 if dtype is None else dtype
     stype = csr if stype is None else stype
-    return Placeholder(shape, dtype, name, stype)
-
-def compute(shape, fcompute, name="compute", tag=""):
-    """Construct a new tensor by computing over the shape domain.
-
-    The compute rule is result[axis] = fcompute(axis)
-
-    Parameters
-    ----------
-    shape: Tuple of Expr
-        The shape of the tensor
-
-    fcompute: lambda function of indices-> value
-        Specifies the input source expression
-
-    name: str, optional
-        The name hint of the tensor
-
-    Returns
-    -------
-    tensor: Tensor
-        The created tensor
-    """
-    if _tag.TagScope.current is not None:
-        if tag != "":
-            raise ValueError("nested tag is not allowed for now")
-        tag = _tag.TagScope.current.tag
-    shape = (shape,) if isinstance(shape, _expr.Expr) else shape
-    ndim = len(shape)
-    code = fcompute.__code__
-
-    if fcompute.__code__.co_argcount == 0:
-        arg_names = ["i%d" % i for i in range(ndim)]
-    else:
-        arg_names = code.co_varnames[:code.co_argcount]
-
-    if ndim != len(arg_names):
-        raise ValueError("fcompute do not match dimension, ndim=%d" % ndim)
-
-    dim_var = [_IterVar((0, s), x, 0) for x, s in zip(arg_names, shape)]
-    body = fcompute(*[v.var for v in dim_var])
-    if not isinstance(body, (list, tuple)):
-        body = [body]
-    body = convert(body)
-    op_node = _api_internal._ComputeOp(
-        name, tag, dim_var, body)
-    num = op_node.num_outputs
-    outputs = tuple(op_node.output(i) for i in range(num))
-    return outputs[0] if num == 1 else outputs
-
+    return CSRPlaceholderOp(shape, dtype, name, stype)
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 7db0be49d508..133b832ead28 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -4,18 +4,28 @@
 
 import tvm
 import tvm.contrib.sparse as tvmsp
+import numpy as np
 
 def test_tensor():
     dtype = 'float32'
     stype = 'csr'
+    target = 'llvm'
+    ctx = tvm.context(target, 0)
     m = tvm.var('m')
-    A = tvmsp.placeholder((m, ), name='A', stype=stype, dtype=dtype)
-    B = tvmsp.placeholder((m, ), name='B', stype=stype, dtype=dtype)
+    A = tvmsp.CSRTensor(shape=(m, ), name='A', dtype=dtype)
     print(vars(A))
     assert(A.stype == 'csr')
-    assert(B.stype == 'csr')
-    shape = [0]
-    assert(str(A.data.shape) == str(shape))
+    C = tvm.compute(A.data.shape, lambda i: A.data[i] + 1., tag='cs_scatter')
+    print(C.shape)
+    s = tvm.create_schedule(C.op)
+    f = tvm.build(s, [A.data, C], target)
+    n = 5
+    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
+    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
+    f(a, c)
+    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1.)
+    print(a.asnumpy())
+    print(c.asnumpy())
 
 if __name__ == "__main__":
     test_tensor()
diff --git a/topi/python/topi/__init__.py b/topi/python/topi/__init__.py
index 349f805cc7f2..0c86f7b39792 100644
--- a/topi/python/topi/__init__.py
+++ b/topi/python/topi/__init__.py
@@ -32,6 +32,7 @@
 from . import rocm
 from . import vision
 from . import image
+from . import sparse
 # not import testing by default
 # because testing can have extra deps that are not necessary
 # we can import them from test cases explicitly
diff --git a/topi/python/topi/sparse/__init__.py b/topi/python/topi/sparse/__init__.py
new file mode 100644
index 000000000000..a751f6ca74d3
--- /dev/null
+++ b/topi/python/topi/sparse/__init__.py
@@ -0,0 +1,6 @@
+# pylint: disable=wildcard-import
+"""Sparse operators"""
+from __future__ import absolute_import as _abs
+
+from .dense import dense
+from .elemwise import *
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
new file mode 100644
index 000000000000..ccdf04981931
--- /dev/null
+++ b/topi/python/topi/sparse/dense.py
@@ -0,0 +1,69 @@
+"""TVM operator fully connected compute."""
+from __future__ import absolute_import
+import tvm
+from .. import tag
+
+def dense_default(data, weight, bias=None):
+    """The default implementation of dense in topi.
+
+    Parameters
+    ----------
+    data : tvm.Tensor
+        2-D with shape [batch, in_dim]
+
+    weight : tvm.Tensor
+        2-D with shape [out_dim, in_dim]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [out_dim]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [batch, out_dim]
+    """
+    assert len(data.shape) == 2 and len(weight.shape) == 2, \
+        "only support 2-dim dense"
+    assert data.stype == 'csr', \
+        "data matrix is assumed to be sparse matrix, but data is `%s`" % (type(data),)
+    assert isinstance(weight, tvm.tensor.Tensor), \
+        "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+    if bias is not None:
+        assert len(bias.shape) == 1
+    batch, in_dim = data.shape
+    out_dim, _ = weight.shape
+    k = tvm.reduce_axis((0, in_dim), name='k')
+    # matmul = tvm.compute((batch, out_dim), \
+    #                      lambda i, j: tvm.sum(data.data[i, k] * weight[j, k], axis=k), \
+    #                      tag='dense')
+    matmul = tvm.compute((batch, out_dim), \
+                         lambda i, j: tvm.sum(data.data[i] * weight[data.indptr[i], k], axis=k), \
+                         tag='spmm')
+    print(matmul.op.body)
+    if bias is not None:
+        matmul = tvm.compute((batch, out_dim), \
+                             lambda i, j: matmul[i, j] + bias[j], \
+                             tag=tag.BROADCAST)
+    return matmul
+
+
+def dense(data, weight, bias=None):
+    """Applies a linear transformation: :math:`Y = XW^T + b`.
+
+    Parameters
+    ----------
+    data : tvm.Tensor
+        2-D with shape [batch, in_dim]
+
+    weight : tvm.Tensor
+        2-D with shape [out_dim, in_dim]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [out_dim]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [batch, out_dim]
+    """
+    return dense_default(data, weight, bias)
diff --git a/topi/python/topi/sparse/elemwise.py b/topi/python/topi/sparse/elemwise.py
new file mode 100644
index 000000000000..3e52c08b12c2
--- /dev/null
+++ b/topi/python/topi/sparse/elemwise.py
@@ -0,0 +1,21 @@
+"""Elementwise operators"""
+from __future__ import absolute_import as _abs
+import tvm
+from .. import tag
+from ..util import get_const_int
+
+@tvm.tag_scope(tag=tag.ELEMWISE)
+def relu(x):
+    """Take relu of input x.
+
+    Parameters
+    ----------
+    x : tvm.Tensor
+        Input argument.
+
+    Returns
+    -------
+    y : tvm.Tensor
+        The result.
+    """
+    return tvm.compute(x.shape, lambda *i: tvm.max(x(*i), tvm.const(0, x.dtype)))
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
new file mode 100644
index 000000000000..6a9739bf7525
--- /dev/null
+++ b/topi/tests/python/test_topi_sparse.py
@@ -0,0 +1,63 @@
+"""Test code for dense operator"""
+import os, sys
+thisdir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.join(thisdir, '../../../python'))
+sys.path.insert(0, os.path.join(thisdir, '../../python'))
+
+import numpy as np
+import tvm
+import topi
+import topi.testing
+from topi.util import get_const_tuple
+from tvm.contrib.pickle_memoize import memoize
+import tvm.contrib.sparse as tvmsp
+
+def verify_dense(batch, in_dim, out_dim, use_bias=True):
+    A = tvmsp.placeholder((batch, in_dim), name='A')
+    B = tvm.placeholder((out_dim, in_dim), name='B')
+    C = tvm.placeholder((out_dim,), name='C')
+    D = topi.sparse.dense(A, B, C if use_bias else None)
+    dtype = A.dtype
+
+    # use memoize to pickle the test data for next time use
+    @memoize("topi.tests.test_topi_dense")
+    def get_ref_data():
+        a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5
+        b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)-0.5
+        c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
+        if use_bias:
+            d_np = np.dot(a_np, b_np.T) + c_np
+        else:
+            d_np = np.dot(a_np, b_np.T)
+        return (a_np, b_np, c_np, d_np)
+    # get the test data
+    a_np, b_np, c_np, d_np = get_ref_data()
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        with tvm.target.create(device):
+            s = topi.generic.schedule_dense(D)
+        a = tvm.nd.array(a_np, ctx).tostype('csr')
+        print(type(a))
+        b = tvm.nd.array(b_np, ctx)
+        c = tvm.nd.array(c_np, ctx)
+        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
+        f = tvm.build(s, [A, B, C, D], device, name="dense")
+        f(a, b, c, d)
+        print(d.asnumpy()[0,:5])
+        print(d_np[0,:5])
+        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-3)
+
+    for device in ['llvm']:
+        check_device(device)
+
+def test_dense():
+    verify_dense(1, in_dim=1024, out_dim=1, use_bias=True)
+    verify_dense(1, in_dim=1024, out_dim=1, use_bias=False)
+
+if __name__ == "__main__":
+    test_dense()

From c98dc77d2b71d0b4d4c1c58b1e144e7aa14c18a1 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 21 Jun 2018 18:54:15 +0800
Subject: [PATCH 05/36] trying to add buffers to be binded with sparse
 placeholders;

---
 python/tvm/_ffi/ndarray.py            |  2 +-
 python/tvm/contrib/sparse.py          | 48 ++++++++++++++++++++++++---
 topi/tests/python/test_topi_sparse.py |  6 ++--
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/python/tvm/_ffi/ndarray.py b/python/tvm/_ffi/ndarray.py
index a879eb7cccaa..1de2de0fd177 100644
--- a/python/tvm/_ffi/ndarray.py
+++ b/python/tvm/_ffi/ndarray.py
@@ -270,7 +270,7 @@ def tostype(self, stype):
         """
         from ..contrib import sparse as tvmsp
         if stype == 'csr':
-            return tvmsp.CSRTensor(shape=self.shape, dtype=self.dtype)
+            return tvmsp.CSRTensor(self.numpy())
         elif stype == 'dense':
             return self
         else:
diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 4c99f30a9fc9..23c18850ef3c 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -8,9 +8,13 @@
 from .. import api as _api
 from .. import tag as _tag
 from .. import tensor as _tensor
+from .. import schedule as _schedule
+
+float32 = "float32"
+csr = "csr"
 
 @register_node
-class CSRTensor(object):
+class CSRNDArray(object):
     """Tensor object, to construct, see function.Tensor"""
     def __init__(self, shape, dtype='float32', name='',
                  data=None, indices=None, indptr=None):
@@ -34,8 +38,12 @@ def __init__(self, shape, dtype='float32', name='',
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
 
-float32 = "float32"
-csr = "csr"
+def array(source_array, ctx=None, dtype=None):
+    ret = None
+    import numpy
+    if isinstance(source_array, numpy.ndarray):
+        return CSRNDArray(shape=source_array.shape, dtype=str(source_array.dtype))
+    return ret
 
 @register_node
 class CSRPlaceholderOp(_tensor.Operation):
@@ -67,6 +75,38 @@ def __init__(self, shape, dtype, name, stype):
         self.indices = _api.placeholder(shape, 'int32', name+'_indices')
         self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
 
+# 
+# @register_node
+# class CSRBuffer(_schedule.Buffer):
+#     """Placeholder class for csr based sparse tensor representation."""
+#     def __init__(self, shape, dtype, name, stype):
+#         """Contructing a bare bone structure for a csr_matrix
+# 
+#         Parameters
+#         ----------
+#         shape: Tuple of Expr
+#             The shape of the tensor
+# 
+#         dtype: str, optional
+#             The data type of the tensor
+# 
+#         name: str, optional
+#             The name hint of the tensor
+# 
+#         stype: str, optional
+#             The storage type of the tensor
+#         """
+#         super(CSRBuffer, self).__init__(self)
+#         self.shape = shape
+#         self.dtype = dtype
+#         self.name = name
+#         self.stype = stype
+#         shape = (0,)
+#         self.data = _api.decl_buffer(shape, dtype, name+'_data')
+#         self.indices = _api.decl_buffer(shape, 'int32', name+'_indices')
+#         self.indptr = _api.decl_buffer(shape, 'int32', name+'_indptr')
+# 
+
 def placeholder(shape, dtype=None, name="placeholder", stype=None):
     """Construct an empty tensor object.
 
@@ -83,7 +123,7 @@ def placeholder(shape, dtype=None, name="placeholder", stype=None):
 
     Returns
     -------
-    tensor: CSRTensor
+    tensor: CSRNDArray
         The created tensor
     """
     shape = (shape,) if isinstance(shape, _expr.Expr) else shape
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 6a9739bf7525..e73f33e108d5 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -41,12 +41,14 @@ def check_device(device):
         print("Running on target: %s" % device)
         with tvm.target.create(device):
             s = topi.generic.schedule_dense(D)
-        a = tvm.nd.array(a_np, ctx).tostype('csr')
+        a = tvmsp.array(a_np, ctx)
         print(type(a))
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A, B, C, D], device, name="dense")
+        Ab = tvm.decl_buffer(A.shape, A.dtype, name="A")
+        binds = {A: Ab, }
+        f = tvm.build(s, [A, B, C, D], device, name="dense", binds=binds)
         f(a, b, c, d)
         print(d.asnumpy()[0,:5])
         print(d_np[0,:5])

From 033e44696c34c9462d0c6a7130d77b6daeb9c998 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 21 Jun 2018 19:09:53 +0800
Subject: [PATCH 06/36] avoid modifying original NDArray;

---
 python/tvm/_ffi/ndarray.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/python/tvm/_ffi/ndarray.py b/python/tvm/_ffi/ndarray.py
index 1de2de0fd177..3788c07ac440 100644
--- a/python/tvm/_ffi/ndarray.py
+++ b/python/tvm/_ffi/ndarray.py
@@ -260,24 +260,6 @@ def copyto(self, target):
             raise ValueError("Unsupported target type %s" % str(type(target)))
         return target
 
-    def tostype(self, stype):
-        """Convert this array to numpy array
-
-        Returns
-        -------
-        np_arr : numpy.ndarray
-            The corresponding numpy array.
-        """
-        from ..contrib import sparse as tvmsp
-        if stype == 'csr':
-            return tvmsp.CSRTensor(self.numpy())
-        elif stype == 'dense':
-            return self
-        else:
-            raise RuntimeError("unknown stype: %s, valid options "
-                               "are `csr` and `dense`." % (stype,))
-            return None
-
 def free_extension_handle(handle, type_code):
     """Free c++ extension type handle
 

From 4952e639d79d1e9c83532f851a9a33845112c1a0 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 21 Jun 2018 19:18:21 +0800
Subject: [PATCH 07/36] enable sparse buffer;

---
 python/tvm/contrib/sparse.py | 70 ++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 23c18850ef3c..384576159d77 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -1,12 +1,9 @@
 """Tensor and Operation class for computation declaration."""
 # pylint: disable=invalid-name
 from __future__ import absolute_import as _abs
-from .._ffi.node import NodeBase, NodeGeneric, register_node, convert_to_node
-from .. import _api_internal
-from .. import make as _make
+from .._ffi.node import register_node
 from .. import expr as _expr
 from .. import api as _api
-from .. import tag as _tag
 from .. import tensor as _tensor
 from .. import schedule as _schedule
 
@@ -38,7 +35,8 @@ def __init__(self, shape, dtype='float32', name='',
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
 
-def array(source_array, ctx=None, dtype=None):
+def array(source_array):
+    """Construct a CSRNDArray from numpy.ndarray"""
     ret = None
     import numpy
     if isinstance(source_array, numpy.ndarray):
@@ -75,37 +73,37 @@ def __init__(self, shape, dtype, name, stype):
         self.indices = _api.placeholder(shape, 'int32', name+'_indices')
         self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
 
-# 
-# @register_node
-# class CSRBuffer(_schedule.Buffer):
-#     """Placeholder class for csr based sparse tensor representation."""
-#     def __init__(self, shape, dtype, name, stype):
-#         """Contructing a bare bone structure for a csr_matrix
-# 
-#         Parameters
-#         ----------
-#         shape: Tuple of Expr
-#             The shape of the tensor
-# 
-#         dtype: str, optional
-#             The data type of the tensor
-# 
-#         name: str, optional
-#             The name hint of the tensor
-# 
-#         stype: str, optional
-#             The storage type of the tensor
-#         """
-#         super(CSRBuffer, self).__init__(self)
-#         self.shape = shape
-#         self.dtype = dtype
-#         self.name = name
-#         self.stype = stype
-#         shape = (0,)
-#         self.data = _api.decl_buffer(shape, dtype, name+'_data')
-#         self.indices = _api.decl_buffer(shape, 'int32', name+'_indices')
-#         self.indptr = _api.decl_buffer(shape, 'int32', name+'_indptr')
-# 
+
+@register_node
+class CSRBuffer(_schedule.Buffer):
+    """Placeholder class for csr based sparse tensor representation."""
+    def __init__(self, shape, dtype, name, stype):
+        """Contructing a bare bone structure for a csr_matrix
+
+        Parameters
+        ----------
+        shape: Tuple of Expr
+            The shape of the tensor
+
+        dtype: str, optional
+            The data type of the tensor
+
+        name: str, optional
+            The name hint of the tensor
+
+        stype: str, optional
+            The storage type of the tensor
+        """
+        super(CSRBuffer, self).__init__(self)
+        self.shape = shape
+        self.dtype = dtype
+        self.name = name
+        self.stype = stype
+        shape = (0,)
+        self.data = _api.decl_buffer(shape, dtype, name+'_data')
+        self.indices = _api.decl_buffer(shape, 'int32', name+'_indices')
+        self.indptr = _api.decl_buffer(shape, 'int32', name+'_indptr')
+
 
 def placeholder(shape, dtype=None, name="placeholder", stype=None):
     """Construct an empty tensor object.

From 12ea0bb86614af3e60e28ce668ce2350e9bfd6dc Mon Sep 17 00:00:00 2001
From: Liangfu Chen <chenclf@gmail.com>
Date: Thu, 21 Jun 2018 22:47:59 +0800
Subject: [PATCH 08/36] bug fix and unpack sparse tensor;

---
 python/tvm/contrib/sparse.py          |  3 ++-
 topi/python/topi/sparse/dense.py      | 15 ++++++++-------
 topi/python/topi/sparse/elemwise.py   |  1 -
 topi/tests/python/test_topi_sparse.py |  4 ++--
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 384576159d77..7aebf2d19248 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -35,7 +35,8 @@ def __init__(self, shape, dtype='float32', name='',
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
 
-def array(source_array):
+def array(source_array, ctx=None):
+    # pylint: disable=unused-argument
     """Construct a CSRNDArray from numpy.ndarray"""
     ret = None
     import numpy
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index ccdf04981931..9c16f6553afe 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -3,7 +3,8 @@
 import tvm
 from .. import tag
 
-def dense_default(data, weight, bias=None):
+def dense_default(data, indices, indptr, weight, bias=None):
+    # pylint: disable=unused-argument
     """The default implementation of dense in topi.
 
     Parameters
@@ -22,22 +23,22 @@ def dense_default(data, weight, bias=None):
     output : tvm.Tensor
         2-D with shape [batch, out_dim]
     """
-    assert len(data.shape) == 2 and len(weight.shape) == 2, \
+    assert len(data.shape) == 1 and len(weight.shape) == 2, \
         "only support 2-dim dense"
-    assert data.stype == 'csr', \
-        "data matrix is assumed to be sparse matrix, but data is `%s`" % (type(data),)
+    # assert data.stype == 'csr', \
+    #     "data matrix is assumed to be sparse matrix, but data is `%s`" % (type(data),)
     assert isinstance(weight, tvm.tensor.Tensor), \
         "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
     if bias is not None:
         assert len(bias.shape) == 1
-    batch, in_dim = data.shape
+    batch, in_dim = 1, data.shape[0]
     out_dim, _ = weight.shape
     k = tvm.reduce_axis((0, in_dim), name='k')
     # matmul = tvm.compute((batch, out_dim), \
     #                      lambda i, j: tvm.sum(data.data[i, k] * weight[j, k], axis=k), \
     #                      tag='dense')
     matmul = tvm.compute((batch, out_dim), \
-                         lambda i, j: tvm.sum(data.data[i] * weight[data.indptr[i], k], axis=k), \
+                         lambda i, j: tvm.sum(data[i] * weight[i, k], axis=k), \
                          tag='spmm')
     print(matmul.op.body)
     if bias is not None:
@@ -66,4 +67,4 @@ def dense(data, weight, bias=None):
     output : tvm.Tensor
         2-D with shape [batch, out_dim]
     """
-    return dense_default(data, weight, bias)
+    return dense_default(data.data, data.indices, data.indptr, weight, bias)
diff --git a/topi/python/topi/sparse/elemwise.py b/topi/python/topi/sparse/elemwise.py
index 3e52c08b12c2..7d68fa990a97 100644
--- a/topi/python/topi/sparse/elemwise.py
+++ b/topi/python/topi/sparse/elemwise.py
@@ -2,7 +2,6 @@
 from __future__ import absolute_import as _abs
 import tvm
 from .. import tag
-from ..util import get_const_int
 
 @tvm.tag_scope(tag=tag.ELEMWISE)
 def relu(x):
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index e73f33e108d5..27a3cdbc9dd4 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -47,8 +47,8 @@ def check_device(device):
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
         Ab = tvm.decl_buffer(A.shape, A.dtype, name="A")
-        binds = {A: Ab, }
-        f = tvm.build(s, [A, B, C, D], device, name="dense", binds=binds)
+        # binds = {A: Ab, }
+        f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
         f(a, b, c, d)
         print(d.asnumpy()[0,:5])
         print(d_np[0,:5])

From 52f8e48ca848c8bd7c4125515827d0b0874926a1 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 22 Jun 2018 13:06:28 +0800
Subject: [PATCH 09/36] first successful `cs_scatter`;

---
 python/tvm/contrib/sparse.py        | 51 +++++++++++++++++++----------
 tests/python/contrib/test_sparse.py | 28 ++++++++++++----
 2 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 7aebf2d19248..129778a3ccea 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -1,47 +1,61 @@
 """Tensor and Operation class for computation declaration."""
 # pylint: disable=invalid-name
 from __future__ import absolute_import as _abs
+import numpy as _np
 from .._ffi.node import register_node
 from .. import expr as _expr
 from .. import api as _api
 from .. import tensor as _tensor
 from .. import schedule as _schedule
+from .. import ndarray as _nd
 
 float32 = "float32"
 csr = "csr"
 
 @register_node
 class CSRNDArray(object):
-    """Tensor object, to construct, see function.Tensor"""
-    def __init__(self, shape, dtype='float32', name='',
-                 data=None, indices=None, indptr=None):
+    """Sparse tensor object in CSR format."""
+    def __init__(self, source_array=None,
+                 data=None, indices=None, indptr=None, ctx=None):
+        """Construct a sparse matrix in CSR format."""
         self.stype = 'csr'
-        self.shape = shape
-        self.dtype = dtype
-        self.name = name
+        self.shape = source_array.shape
+        self.dtype = source_array.dtype
         if data is None:
-            self.data = _api.placeholder(shape, dtype, name+'_data')
+            ridx, cidx = _np.nonzero(source_array)
+            print(ridx, cidx)
+            data = source_array[ridx, cidx]
+            self.data = _nd.array(data, ctx)
         else:
             self.data = data
         if indices is None:
-            self.indices = _api.placeholder(shape, 'int32', name+'_indices')
+            indices = _np.nonzero(source_array)[1]
+            self.indices = _nd.array(indices, ctx)
         else:
             self.indices = indices
         if indptr is None:
-            self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
+            indptr = [0]+_np.apply_along_axis(_np.count_nonzero, axis=1, arr=source_array).tolist()
+            indptr = _np.cumsum(_np.array(indptr, 'int32'))
+            self.indptr = _nd.array(indptr, ctx)
         else:
             self.indptr = indptr
-        assert isinstance(self.data, _tensor.Tensor)
-        assert isinstance(self.indices, _tensor.Tensor)
-        assert isinstance(self.indptr, _tensor.Tensor)
+        assert isinstance(self.data, _nd.NDArray)
+        assert isinstance(self.indices, _nd.NDArray)
+        assert isinstance(self.indptr, _nd.NDArray)
+
+    def asnumpy(self):
+        """Construct a full matrix and convert it to numpy array."""
+        full = _np.zeros(self.shape, self.dtype)
+        ridx = _np.diff(self.indptr.asnumpy())
+        ridx = _np.hstack((_np.ones((v,), 'int32')*i for i, v in enumerate(ridx)))
+        full[ridx, self.indices.asnumpy().astype('int32')] = self.data.asnumpy()
+        return full
 
 def array(source_array, ctx=None):
-    # pylint: disable=unused-argument
     """Construct a CSRNDArray from numpy.ndarray"""
     ret = None
-    import numpy
-    if isinstance(source_array, numpy.ndarray):
-        return CSRNDArray(shape=source_array.shape, dtype=str(source_array.dtype))
+    if isinstance(source_array, _np.ndarray):
+        return CSRNDArray(source_array=source_array, ctx=ctx)
     return ret
 
 @register_node
@@ -69,10 +83,13 @@ def __init__(self, shape, dtype, name, stype):
         self.dtype = dtype
         self.name = name
         self.stype = stype
-        shape = (0,)
+        # shape = (0,)
         self.data = _api.placeholder(shape, dtype, name+'_data')
         self.indices = _api.placeholder(shape, 'int32', name+'_indices')
         self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
+        assert isinstance(self.data, _tensor.Tensor)
+        assert isinstance(self.indices, _tensor.Tensor)
+        assert isinstance(self.indptr, _tensor.Tensor)
 
 
 @register_node
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 133b832ead28..5023d6e069b9 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -12,20 +12,34 @@ def test_tensor():
     target = 'llvm'
     ctx = tvm.context(target, 0)
     m = tvm.var('m')
-    A = tvmsp.CSRTensor(shape=(m, ), name='A', dtype=dtype)
+    A = tvmsp.placeholder(shape=(m, ), name='A', dtype=dtype)
     print(vars(A))
     assert(A.stype == 'csr')
-    C = tvm.compute(A.data.shape, lambda i: A.data[i] + 1., tag='cs_scatter')
+    C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
     print(C.shape)
     s = tvm.create_schedule(C.op)
     f = tvm.build(s, [A.data, C], target)
-    n = 5
-    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
-    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
-    f(a, c)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1.)
+    n = 3
+    a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.)
+    print(a)
+    a = tvmsp.array(a, ctx)
+    c = tvmsp.array(np.zeros((n,n), dtype), ctx)
+    c.data = tvm.nd.empty(a.data.shape, dtype)
+    c.indices = a.indices
+    c.indptr = a.indptr
+    print('==== a ====')
+    print(a.data)
+    print(a.indices)
+    print(a.indptr)
+    print('==== c ====')
+    print(c.data)
+    print(c.indices)
+    print(c.indptr)
+    f(a.data, c.data)
+    print('==== output ====')
     print(a.asnumpy())
     print(c.asnumpy())
+    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
 
 if __name__ == "__main__":
     test_tensor()

From 13a40f5de4dfd5f51682c41cdbe6704614eb3bd7 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 22 Jun 2018 14:03:01 +0800
Subject: [PATCH 10/36] bug fix;

---
 python/tvm/contrib/sparse.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 129778a3ccea..603f6ec0cb73 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -59,7 +59,7 @@ def array(source_array, ctx=None):
     return ret
 
 @register_node
-class CSRPlaceholderOp(_tensor.Operation):
+class CSRPlaceholderOp(object):
     """Placeholder class for csr based sparse tensor representation."""
     def __init__(self, shape, dtype, name, stype):
         """Contructing a bare bone structure for a csr_matrix
@@ -78,7 +78,6 @@ def __init__(self, shape, dtype, name, stype):
         stype: str, optional
             The storage type of the tensor
         """
-        super(CSRPlaceholderOp, self).__init__(self)
         self.shape = shape
         self.dtype = dtype
         self.name = name

From 0e6bb1d3535426f77006d3898c20047f048ce2b6 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 22 Jun 2018 18:43:16 +0800
Subject: [PATCH 11/36] implemented topi.sparse.dense;

---
 topi/python/topi/sparse/dense.py      | 36 +++++++++++++++++++--------
 topi/tests/python/test_topi_sparse.py | 17 ++++++-------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index 9c16f6553afe..0c90b5268cdd 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -23,23 +23,37 @@ def dense_default(data, indices, indptr, weight, bias=None):
     output : tvm.Tensor
         2-D with shape [batch, out_dim]
     """
-    assert len(data.shape) == 1 and len(weight.shape) == 2, \
+    assert len(data.shape) == 2 and len(weight.shape) == 2, \
         "only support 2-dim dense"
-    # assert data.stype == 'csr', \
-    #     "data matrix is assumed to be sparse matrix, but data is `%s`" % (type(data),)
     assert isinstance(weight, tvm.tensor.Tensor), \
         "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
     if bias is not None:
         assert len(bias.shape) == 1
-    batch, in_dim = 1, data.shape[0]
+    batch = 1
     out_dim, _ = weight.shape
-    k = tvm.reduce_axis((0, in_dim), name='k')
-    # matmul = tvm.compute((batch, out_dim), \
-    #                      lambda i, j: tvm.sum(data.data[i, k] * weight[j, k], axis=k), \
-    #                      tag='dense')
-    matmul = tvm.compute((batch, out_dim), \
-                         lambda i, j: tvm.sum(data[i] * weight[i, k], axis=k), \
-                         tag='spmm')
+    def dense_default_ir(data, indices, indptr, weight, out):
+        # pylint: disable=invalid-name
+        """Define IR for SpMM"""
+        ib = tvm.ir_builder.create()
+        data_ptr = ib.buffer_ptr(data)
+        indices_ptr = ib.buffer_ptr(indices)
+        indptr_ptr = ib.buffer_ptr(indptr)
+        weight_ptr = ib.buffer_ptr(weight)
+        out_ptr = ib.buffer_ptr(out)
+        num_rows = indptr.shape[0]-1
+        with ib.for_range(0, num_rows, name='row') as row:
+            dot = ib.allocate('float32', (1,), name='dot', scope='local')
+            dot[0] = 0.
+            row_start = indptr_ptr[row]
+            row_end = indptr_ptr[row+1]
+            with ib.for_range(row_start, row_end, name='elem') as elem:
+                dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]]
+            out_ptr[row] += dot[0]
+        return ib.get()
+    oshape = (out_dim, 1)
+    matmul = tvm.extern(oshape, [data, indices, indptr, weight],
+                        lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+                        tag="dense", dtype='float32')
     print(matmul.op.body)
     if bias is not None:
         matmul = tvm.compute((batch, out_dim), \
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 27a3cdbc9dd4..32d8b839f3d6 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -20,9 +20,9 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
     dtype = A.dtype
 
     # use memoize to pickle the test data for next time use
-    @memoize("topi.tests.test_topi_dense")
+    # @memoize("topi.tests.test_topi_dense")
     def get_ref_data():
-        a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5
+        a_np = np.maximum(np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5, 0.)
         b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)-0.5
         c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
         if use_bias:
@@ -39,15 +39,15 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        with tvm.target.create(device):
-            s = topi.generic.schedule_dense(D)
+        s = tvm.create_schedule(D.op)
         a = tvmsp.array(a_np, ctx)
-        print(type(a))
+        print(a_np)
+        print(a.data)
+        print(a.indices)
+        print(a.indptr)
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
-        Ab = tvm.decl_buffer(A.shape, A.dtype, name="A")
-        # binds = {A: Ab, }
         f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
         f(a, b, c, d)
         print(d.asnumpy()[0,:5])
@@ -58,8 +58,7 @@ def check_device(device):
         check_device(device)
 
 def test_dense():
-    verify_dense(1, in_dim=1024, out_dim=1, use_bias=True)
-    verify_dense(1, in_dim=1024, out_dim=1, use_bias=False)
+    verify_dense(3, in_dim=3, out_dim=1, use_bias=False)
 
 if __name__ == "__main__":
     test_dense()

From 95201969e75844c02dbbd28130ffd8297a88b1db Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 22 Jun 2018 19:41:42 +0800
Subject: [PATCH 12/36] bug fix;

---
 topi/tests/python/test_topi_sparse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 32d8b839f3d6..29d692d3c095 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -49,7 +49,7 @@ def check_device(device):
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
-        f(a, b, c, d)
+        f(a.data, a.indices, a.indptr, b, c, d)
         print(d.asnumpy()[0,:5])
         print(d_np[0,:5])
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-3)

From f948622b7dbfe2277b44b9e781657bbde087233c Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Mon, 25 Jun 2018 12:59:30 +0800
Subject: [PATCH 13/36] first successful csrmv implement;

---
 python/tvm/contrib/sparse.py                  | 13 +--
 topi/python/topi/sparse/__init__.py           |  2 +-
 .../python/topi/sparse/{dense.py => csrmv.py} | 27 +++---
 topi/tests/python/test_topi_sparse.py         | 91 +++++++++++--------
 4 files changed, 74 insertions(+), 59 deletions(-)
 rename topi/python/topi/sparse/{dense.py => csrmv.py} (75%)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 603f6ec0cb73..f27a3023399f 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -29,19 +29,21 @@ def __init__(self, source_array=None,
         else:
             self.data = data
         if indices is None:
-            indices = _np.nonzero(source_array)[1]
+            indices = _np.nonzero(source_array)[1].astype('int32')
             self.indices = _nd.array(indices, ctx)
         else:
             self.indices = indices
         if indptr is None:
             indptr = [0]+_np.apply_along_axis(_np.count_nonzero, axis=1, arr=source_array).tolist()
-            indptr = _np.cumsum(_np.array(indptr, 'int32'))
+            indptr = _np.cumsum(_np.array(indptr, 'int32')).astype('int32')
             self.indptr = _nd.array(indptr, ctx)
         else:
             self.indptr = indptr
         assert isinstance(self.data, _nd.NDArray)
         assert isinstance(self.indices, _nd.NDArray)
+        assert str(self.indices.dtype) == 'int32', str(self.indices.dtype)
         assert isinstance(self.indptr, _nd.NDArray)
+        assert str(self.indptr.dtype) == 'int32', str(self.indptr.dtype)
 
     def asnumpy(self):
         """Construct a full matrix and convert it to numpy array."""
@@ -82,10 +84,9 @@ def __init__(self, shape, dtype, name, stype):
         self.dtype = dtype
         self.name = name
         self.stype = stype
-        # shape = (0,)
-        self.data = _api.placeholder(shape, dtype, name+'_data')
-        self.indices = _api.placeholder(shape, 'int32', name+'_indices')
-        self.indptr = _api.placeholder(shape, 'int32', name+'_indptr')
+        self.data = _api.placeholder((0,), dtype, name+'_data')
+        self.indices = _api.placeholder((0,), 'int32', name+'_indices')
+        self.indptr = _api.placeholder((self.shape[0]+1,), 'int32', name+'_indptr')
         assert isinstance(self.data, _tensor.Tensor)
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
diff --git a/topi/python/topi/sparse/__init__.py b/topi/python/topi/sparse/__init__.py
index a751f6ca74d3..6c25938b34a3 100644
--- a/topi/python/topi/sparse/__init__.py
+++ b/topi/python/topi/sparse/__init__.py
@@ -2,5 +2,5 @@
 """Sparse operators"""
 from __future__ import absolute_import as _abs
 
-from .dense import dense
+from .csrmv import csrmv
 from .elemwise import *
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/csrmv.py
similarity index 75%
rename from topi/python/topi/sparse/dense.py
rename to topi/python/topi/sparse/csrmv.py
index 0c90b5268cdd..d70698068baf 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/csrmv.py
@@ -3,9 +3,9 @@
 import tvm
 from .. import tag
 
-def dense_default(data, indices, indptr, weight, bias=None):
+def csrmv_default(data, indices, indptr, weight, bias=None):
     # pylint: disable=unused-argument
-    """The default implementation of dense in topi.
+    """The default implementation of csrmv in topi.
 
     Parameters
     ----------
@@ -23,15 +23,15 @@ def dense_default(data, indices, indptr, weight, bias=None):
     output : tvm.Tensor
         2-D with shape [batch, out_dim]
     """
-    assert len(data.shape) == 2 and len(weight.shape) == 2, \
-        "only support 2-dim dense"
+    assert len(data.shape) == 1 and len(weight.shape) == 2, \
+        "only support 2-dim csrmv"
     assert isinstance(weight, tvm.tensor.Tensor), \
         "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
     if bias is not None:
         assert len(bias.shape) == 1
-    batch = 1
+    batch = indptr.shape[0]-1
     out_dim, _ = weight.shape
-    def dense_default_ir(data, indices, indptr, weight, out):
+    def csrmv_default_ir(data, indices, indptr, weight, out):
         # pylint: disable=invalid-name
         """Define IR for SpMM"""
         ib = tvm.ir_builder.create()
@@ -46,15 +46,16 @@ def dense_default_ir(data, indices, indptr, weight, out):
             dot[0] = 0.
             row_start = indptr_ptr[row]
             row_end = indptr_ptr[row+1]
-            with ib.for_range(row_start, row_end, name='elem') as elem:
+            row_elems = row_end-row_start
+            with ib.for_range(0, row_elems, name='elemidx') as elemidx:
+                elem = row_start+elemidx
                 dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]]
             out_ptr[row] += dot[0]
         return ib.get()
-    oshape = (out_dim, 1)
+    oshape = (batch, 1)
     matmul = tvm.extern(oshape, [data, indices, indptr, weight],
-                        lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
-                        tag="dense", dtype='float32')
-    print(matmul.op.body)
+                        lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+                        tag="csrmv", dtype='float32')
     if bias is not None:
         matmul = tvm.compute((batch, out_dim), \
                              lambda i, j: matmul[i, j] + bias[j], \
@@ -62,7 +63,7 @@ def dense_default_ir(data, indices, indptr, weight, out):
     return matmul
 
 
-def dense(data, weight, bias=None):
+def csrmv(data, weight, bias=None):
     """Applies a linear transformation: :math:`Y = XW^T + b`.
 
     Parameters
@@ -81,4 +82,4 @@ def dense(data, weight, bias=None):
     output : tvm.Tensor
         2-D with shape [batch, out_dim]
     """
-    return dense_default(data.data, data.indices, data.indptr, weight, bias)
+    return csrmv_default(data.data, data.indices, data.indptr, weight, bias)
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 29d692d3c095..b76e93dda195 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -1,4 +1,4 @@
-"""Test code for dense operator"""
+"""Test code for sparse operator"""
 import os, sys
 thisdir = os.path.dirname(os.path.abspath(__file__))
 sys.path.insert(0, os.path.join(thisdir, '../../../python'))
@@ -9,56 +9,69 @@
 import topi
 import topi.testing
 from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
 import tvm.contrib.sparse as tvmsp
+from collections import namedtuple
 
-def verify_dense(batch, in_dim, out_dim, use_bias=True):
+def verify_static_csrmv(batch, in_dim, out_dim, use_bias=True):
     A = tvmsp.placeholder((batch, in_dim), name='A')
-    B = tvm.placeholder((out_dim, in_dim), name='B')
-    C = tvm.placeholder((out_dim,), name='C')
-    D = topi.sparse.dense(A, B, C if use_bias else None)
+    B = tvm.placeholder((in_dim, 1), name='B')
+    C = tvm.placeholder((batch, 1), name='C')
+    D = topi.sparse.csrmv(A, B, C if use_bias else None)
     dtype = A.dtype
 
-    # use memoize to pickle the test data for next time use
-    # @memoize("topi.tests.test_topi_dense")
+    # get the test data
     def get_ref_data():
         a_np = np.maximum(np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5, 0.)
-        b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)-0.5
-        c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
+        b_np = np.random.uniform(size=(in_dim, 1)).astype(dtype)-0.5
+        c_np = np.random.uniform(size=(batch, 1)).astype(dtype)
         if use_bias:
-            d_np = np.dot(a_np, b_np.T) + c_np
+            d_np = np.dot(a_np, b_np) + c_np
         else:
-            d_np = np.dot(a_np, b_np.T)
+            d_np = np.dot(a_np, b_np)
         return (a_np, b_np, c_np, d_np)
-    # get the test data
     a_np, b_np, c_np, d_np = get_ref_data()
 
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        s = tvm.create_schedule(D.op)
-        a = tvmsp.array(a_np, ctx)
-        print(a_np)
-        print(a.data)
-        print(a.indices)
-        print(a.indptr)
-        b = tvm.nd.array(b_np, ctx)
-        c = tvm.nd.array(c_np, ctx)
-        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
-        f(a.data, a.indices, a.indptr, b, c, d)
-        print(d.asnumpy()[0,:5])
-        print(d_np[0,:5])
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-3)
-
-    for device in ['llvm']:
-        check_device(device)
+    device = 'llvm'
+    ctx = tvm.context(device, 0)
+    if not ctx.exist:
+        print("Skip because %s is not enabled" % device)
+        return
+    print("Running on target: %s" % device)
+    a = tvmsp.array(a_np, ctx)
+    print(a_np)
+    print(a.data)
+    print(a.indices)
+    print(a.indptr)
+    b = tvm.nd.array(b_np, ctx)
+    c = tvm.nd.array(c_np, ctx)
+    d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
+    A.data = tvm.placeholder(shape=a.data.shape, dtype='float32', name='A_data')
+    A.indices = tvm.placeholder(shape=a.indices.shape, dtype='int32', name='A_indices')
+    A.indptr = tvm.placeholder(shape=a.indptr.shape, dtype='int32', name='A_indptr')
+    D = topi.sparse.csrmv(A, B, C if use_bias else None)
+    s = tvm.create_schedule(D.op)
+    Ab = namedtuple('CSRBuffer', ['data','indices','indptr'])
+    print('A_data', A.data.shape, A.data.dtype)
+    print('A_indices', A.indices.shape, A.indices.dtype)
+    print('A_indptr', A.indptr.shape, A.indptr.dtype)
+    print('B:', B.shape, B.dtype)
+    print('C:', C.shape, C.dtype)
+    print('D:', D.shape, D.dtype)
+    print('b:', b.shape, b.dtype)
+    print('c:', c.shape, c.dtype)
+    print('d:', d.shape, d.dtype)
+    Ab.data = tvm.decl_buffer(A.data.shape, A.data.dtype, name='A_data')
+    Ab.indices = tvm.decl_buffer(A.indices.shape, A.indices.dtype, name='A_indices')
+    Ab.indptr = tvm.decl_buffer(A.indptr.shape, A.indptr.dtype, name='A_indptr')
+    binds = {A.data: Ab.data, A.indices: Ab.indices, A.indptr: Ab.indptr}
+    f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="csrmv", binds=binds)
+    f(a.data, a.indices, a.indptr, b, c, d)
+    print(d.asnumpy().T)
+    print(d_np.T)
+    np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
-def test_dense():
-    verify_dense(3, in_dim=3, out_dim=1, use_bias=False)
+def test_csrmv():
+    verify_static_csrmv(batch=3, in_dim=5, out_dim=1, use_bias=False)
 
 if __name__ == "__main__":
-    test_dense()
+    test_csrmv()

From 2b3a34a0ac28436a057374b5a03c3e9d3de4be62 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Mon, 25 Jun 2018 14:36:24 +0800
Subject: [PATCH 14/36] test sparse tensor;

---
 python/tvm/contrib/sparse.py          |  1 -
 tests/python/contrib/test_sparse.py   | 12 ++++++++----
 topi/tests/python/test_topi_sparse.py | 13 -------------
 3 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index f27a3023399f..528acd0c43fc 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -23,7 +23,6 @@ def __init__(self, source_array=None,
         self.dtype = source_array.dtype
         if data is None:
             ridx, cidx = _np.nonzero(source_array)
-            print(ridx, cidx)
             data = source_array[ridx, cidx]
             self.data = _nd.array(data, ctx)
         else:
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 5023d6e069b9..07b0193b7d10 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -15,14 +15,18 @@ def test_tensor():
     A = tvmsp.placeholder(shape=(m, ), name='A', dtype=dtype)
     print(vars(A))
     assert(A.stype == 'csr')
+    n = 3
+    a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.)
+    a = tvmsp.array(a, ctx)
+    print(a.data.shape)
+    A.data = tvm.placeholder(a.data.shape, dtype, name='A_data')
+    Ab = tvm.decl_buffer(a.data.shape, dtype, name='A_data')
+    binds = {A.data: Ab}
     C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
     print(C.shape)
     s = tvm.create_schedule(C.op)
-    f = tvm.build(s, [A.data, C], target)
-    n = 3
-    a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.)
+    f = tvm.build(s, [A.data, C], target, binds=binds)
     print(a)
-    a = tvmsp.array(a, ctx)
     c = tvmsp.array(np.zeros((n,n), dtype), ctx)
     c.data = tvm.nd.empty(a.data.shape, dtype)
     c.indices = a.indices
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index b76e93dda195..c11b32b82050 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -38,10 +38,6 @@ def get_ref_data():
         return
     print("Running on target: %s" % device)
     a = tvmsp.array(a_np, ctx)
-    print(a_np)
-    print(a.data)
-    print(a.indices)
-    print(a.indptr)
     b = tvm.nd.array(b_np, ctx)
     c = tvm.nd.array(c_np, ctx)
     d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
@@ -51,15 +47,6 @@ def get_ref_data():
     D = topi.sparse.csrmv(A, B, C if use_bias else None)
     s = tvm.create_schedule(D.op)
     Ab = namedtuple('CSRBuffer', ['data','indices','indptr'])
-    print('A_data', A.data.shape, A.data.dtype)
-    print('A_indices', A.indices.shape, A.indices.dtype)
-    print('A_indptr', A.indptr.shape, A.indptr.dtype)
-    print('B:', B.shape, B.dtype)
-    print('C:', C.shape, C.dtype)
-    print('D:', D.shape, D.dtype)
-    print('b:', b.shape, b.dtype)
-    print('c:', c.shape, c.dtype)
-    print('d:', d.shape, d.dtype)
     Ab.data = tvm.decl_buffer(A.data.shape, A.data.dtype, name='A_data')
     Ab.indices = tvm.decl_buffer(A.indices.shape, A.indices.dtype, name='A_indices')
     Ab.indptr = tvm.decl_buffer(A.indptr.shape, A.indptr.dtype, name='A_indptr')

From f6e5073f63c892c79d5cdf6bd7dab8e4afc49068 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Tue, 26 Jun 2018 12:28:20 +0800
Subject: [PATCH 15/36] enable dynamic memory allocation for sparse tensor
 placeholder;

---
 python/tvm/contrib/sparse.py        |  4 +--
 tests/python/contrib/test_sparse.py | 52 ++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 528acd0c43fc..e44c1e0b83ca 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -83,8 +83,8 @@ def __init__(self, shape, dtype, name, stype):
         self.dtype = dtype
         self.name = name
         self.stype = stype
-        self.data = _api.placeholder((0,), dtype, name+'_data')
-        self.indices = _api.placeholder((0,), 'int32', name+'_indices')
+        self.data = _api.placeholder((shape[1],), dtype, name+'_data')
+        self.indices = _api.placeholder((shape[1],), 'int32', name+'_indices')
         self.indptr = _api.placeholder((self.shape[0]+1,), 'int32', name+'_indptr')
         assert isinstance(self.data, _tensor.Tensor)
         assert isinstance(self.indices, _tensor.Tensor)
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 07b0193b7d10..d84369e7c9f0 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -5,14 +5,16 @@
 import tvm
 import tvm.contrib.sparse as tvmsp
 import numpy as np
+from collections import namedtuple
 
-def test_tensor():
+def test_static_tensor():
     dtype = 'float32'
     stype = 'csr'
     target = 'llvm'
     ctx = tvm.context(target, 0)
     m = tvm.var('m')
-    A = tvmsp.placeholder(shape=(m, ), name='A', dtype=dtype)
+    n = tvm.var('n')
+    A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype)
     print(vars(A))
     assert(A.stype == 'csr')
     n = 3
@@ -31,20 +33,46 @@ def test_tensor():
     c.data = tvm.nd.empty(a.data.shape, dtype)
     c.indices = a.indices
     c.indptr = a.indptr
-    print('==== a ====')
-    print(a.data)
-    print(a.indices)
-    print(a.indptr)
-    print('==== c ====')
-    print(c.data)
-    print(c.indices)
-    print(c.indptr)
     f(a.data, c.data)
     print('==== output ====')
-    print(a.asnumpy())
+    print(a.asnumpy()*2.)
+    print(c.asnumpy())
+    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
+
+def test_dynamic_tensor():
+    dtype = 'float32'
+    stype = 'csr'
+    target = 'llvm'
+    ctx = tvm.context(target, 0)
+    m = tvm.var('m')
+    n = tvm.var('n')
+    nr, nc = 3, 5
+    A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype)
+    print(vars(A))
+    assert(A.stype == 'csr')
+    C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
+    print(C.shape)
+    s = tvm.create_schedule(C.op)
+    a = np.maximum(np.random.uniform(size=(nr, nc)).astype(dtype)-.6, 0.)
+    a = tvmsp.array(a, ctx)
+    print(a.data.shape)
+    Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
+    Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
+    Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
+    binds = {A.data: Ab.data, A.indices: Ab.indices}
+    f = tvm.build(s, [m, n, A.data, C], target, binds=binds)
+    print(a)
+    c = tvmsp.array(np.zeros((nr, nc), dtype), ctx)
+    c.data = tvm.nd.empty(a.data.shape, dtype)
+    c.indices = a.indices
+    c.indptr = a.indptr
+    f(nr, a.data.shape[0], a.data, c.data)
+    print('==== output ====')
+    print(a.asnumpy()*2.)
     print(c.asnumpy())
     np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
 
 if __name__ == "__main__":
-    test_tensor()
+    test_static_tensor()
+    test_dynamic_tensor()
 

From 10cb79e2b9ee931749d723acaa4468ba6e6a6f42 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Tue, 26 Jun 2018 12:55:53 +0800
Subject: [PATCH 16/36] enable dynamic memory allocation for csrmv;

---
 python/tvm/contrib/sparse.py          | 13 +++----
 topi/tests/python/test_topi_sparse.py | 49 +++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index e44c1e0b83ca..1053daf22fdb 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -62,7 +62,7 @@ def array(source_array, ctx=None):
 @register_node
 class CSRPlaceholderOp(object):
     """Placeholder class for csr based sparse tensor representation."""
-    def __init__(self, shape, dtype, name, stype):
+    def __init__(self, shape, nonzeros, dtype, name, stype):
         """Contructing a bare bone structure for a csr_matrix
 
         Parameters
@@ -83,9 +83,9 @@ def __init__(self, shape, dtype, name, stype):
         self.dtype = dtype
         self.name = name
         self.stype = stype
-        self.data = _api.placeholder((shape[1],), dtype, name+'_data')
-        self.indices = _api.placeholder((shape[1],), 'int32', name+'_indices')
-        self.indptr = _api.placeholder((self.shape[0]+1,), 'int32', name+'_indptr')
+        self.data = _api.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data')
+        self.indices = _api.placeholder((nonzeros,), dtype='int32', name=self.name+'_indices')
+        self.indptr = _api.placeholder((self.shape[0]+1,), dtype='int32', name=self.name+'_indptr')
         assert isinstance(self.data, _tensor.Tensor)
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
@@ -122,7 +122,7 @@ def __init__(self, shape, dtype, name, stype):
         self.indptr = _api.decl_buffer(shape, 'int32', name+'_indptr')
 
 
-def placeholder(shape, dtype=None, name="placeholder", stype=None):
+def placeholder(shape, nonzeros=None, dtype=None, name="placeholder", stype=None):
     """Construct an empty tensor object.
 
     Parameters
@@ -142,6 +142,7 @@ def placeholder(shape, dtype=None, name="placeholder", stype=None):
         The created tensor
     """
     shape = (shape,) if isinstance(shape, _expr.Expr) else shape
+    nonzeros = 0 if nonzeros is None else nonzeros
     dtype = float32 if dtype is None else dtype
     stype = csr if stype is None else stype
-    return CSRPlaceholderOp(shape, dtype, name, stype)
+    return CSRPlaceholderOp(shape=shape, nonzeros=nonzeros, dtype=dtype, name=name, stype=stype)
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index c11b32b82050..d4bd8de6d2ad 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -57,7 +57,56 @@ def get_ref_data():
     print(d_np.T)
     np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
+def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
+    nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n")
+    dtype = 'float32'
+    A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A')
+    B = tvm.placeholder((in_dim, 1), name='B')
+    C = tvm.placeholder((batch, 1), name='C')
+    D = topi.sparse.csrmv(A, B, C if use_bias else None)
+    s = tvm.create_schedule(D.op)
+    dtype = A.dtype
+
+    # get the test data
+    def get_ref_data():
+        a_np = np.maximum(np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5, 0.)
+        b_np = np.random.uniform(size=(in_dim, 1)).astype(dtype)-0.5
+        c_np = np.random.uniform(size=(batch, 1)).astype(dtype)
+        if use_bias:
+            d_np = np.dot(a_np, b_np) + c_np
+        else:
+            d_np = np.dot(a_np, b_np)
+        return (a_np, b_np, c_np, d_np)
+    a_np, b_np, c_np, d_np = get_ref_data()
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        a = tvmsp.array(a_np, ctx)
+        _nr, _nc, _n = a.shape[0], a.shape[1], a.data.shape[0]
+        assert a.shape[0] == a.indptr.shape[0]-1
+        b = tvm.nd.array(b_np, ctx)
+        c = tvm.nd.array(c_np, ctx)
+        d = tvm.nd.array(np.zeros((_nr, 1), dtype=dtype), ctx)
+        Ab = namedtuple('CSRBuffer', ['data','indices','indptr'])
+        Ab.data = tvm.decl_buffer(A.data.shape, A.data.dtype, name='A_data')
+        Ab.indices = tvm.decl_buffer(A.indices.shape, A.indices.dtype, name='A_indices')
+        Ab.indptr = tvm.decl_buffer(A.indptr.shape, A.indptr.dtype, name='A_indptr')
+        binds = {A.data: Ab.data, A.indices: Ab.indices, A.indptr: Ab.indptr}
+        f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmv", binds=binds)
+        f(_nr, a.data, a.indices, a.indptr, b, c, d)
+        print(d.asnumpy().T)
+        print(d_np.T)
+        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+
+    for device in ["llvm"]:
+        check_device(device)
+
 def test_csrmv():
+    verify_dynamic_csrmv(batch=3, in_dim=5, out_dim=1, use_bias=False)
     verify_static_csrmv(batch=3, in_dim=5, out_dim=1, use_bias=False)
 
 if __name__ == "__main__":

From 2368b89350262940632aacd2cf93a72da8b8f8ee Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Tue, 26 Jun 2018 13:03:20 +0800
Subject: [PATCH 17/36] bug fix;

---
 tests/python/contrib/test_sparse.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index d84369e7c9f0..489145c6ea1a 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -44,29 +44,28 @@ def test_dynamic_tensor():
     stype = 'csr'
     target = 'llvm'
     ctx = tvm.context(target, 0)
-    m = tvm.var('m')
-    n = tvm.var('n')
-    nr, nc = 3, 5
-    A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype)
+    nr, nc, n = tvm.var('nr'), tvm.var('nc'), tvm.var('n')
+    A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
     print(vars(A))
     assert(A.stype == 'csr')
     C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
     print(C.shape)
     s = tvm.create_schedule(C.op)
-    a = np.maximum(np.random.uniform(size=(nr, nc)).astype(dtype)-.6, 0.)
+    _nr, _nc = 3, 5
+    a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.)
     a = tvmsp.array(a, ctx)
     print(a.data.shape)
     Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
     Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
     Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
     binds = {A.data: Ab.data, A.indices: Ab.indices}
-    f = tvm.build(s, [m, n, A.data, C], target, binds=binds)
+    f = tvm.build(s, [nr, A.data, C], target, binds=binds)
     print(a)
-    c = tvmsp.array(np.zeros((nr, nc), dtype), ctx)
+    c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
     c.data = tvm.nd.empty(a.data.shape, dtype)
     c.indices = a.indices
     c.indptr = a.indptr
-    f(nr, a.data.shape[0], a.data, c.data)
+    f(a.data.shape[0], a.data, c.data)
     print('==== output ====')
     print(a.asnumpy()*2.)
     print(c.asnumpy())

From 5f9c1394fc617195e5bc604002c72e1440f2a9b1 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Tue, 26 Jun 2018 16:30:23 +0800
Subject: [PATCH 18/36] improved code comment for documentation;

---
 python/tvm/contrib/sparse.py          | 57 ++++++++++-----------------
 topi/python/topi/sparse/csrmv.py      | 42 +++++++++++---------
 topi/tests/python/test_topi_sparse.py |  7 +---
 3 files changed, 45 insertions(+), 61 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 1053daf22fdb..4dc8e76c0d27 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -6,7 +6,6 @@
 from .. import expr as _expr
 from .. import api as _api
 from .. import tensor as _tensor
-from .. import schedule as _schedule
 from .. import ndarray as _nd
 
 float32 = "float32"
@@ -17,7 +16,25 @@ class CSRNDArray(object):
     """Sparse tensor object in CSR format."""
     def __init__(self, source_array=None,
                  data=None, indices=None, indptr=None, ctx=None):
-        """Construct a sparse matrix in CSR format."""
+        """Construct a sparse matrix in CSR format.
+
+        Parameters
+        ----------
+        source_array : numpy.ndarray
+            The corresponding numpy array.
+
+        data : tvm.ndarray (optional)
+            The data array for constructing sparse matrix
+
+        indices : tvm.ndarray (optional)
+            The indices array for constructing sparse matrix
+
+        indptr : tvm.ndarray (optional)
+            The indptr array for constructing sparse matrix
+
+        ctx: tvm.TVMContext
+            The corresponding context.
+        """
         self.stype = 'csr'
         self.shape = source_array.shape
         self.dtype = source_array.dtype
@@ -61,7 +78,7 @@ def array(source_array, ctx=None):
 
 @register_node
 class CSRPlaceholderOp(object):
-    """Placeholder class for csr based sparse tensor representation."""
+    """Placeholder class for CSR based sparse tensor representation."""
     def __init__(self, shape, nonzeros, dtype, name, stype):
         """Contructing a bare bone structure for a csr_matrix
 
@@ -90,40 +107,8 @@ def __init__(self, shape, nonzeros, dtype, name, stype):
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
 
-
-@register_node
-class CSRBuffer(_schedule.Buffer):
-    """Placeholder class for csr based sparse tensor representation."""
-    def __init__(self, shape, dtype, name, stype):
-        """Contructing a bare bone structure for a csr_matrix
-
-        Parameters
-        ----------
-        shape: Tuple of Expr
-            The shape of the tensor
-
-        dtype: str, optional
-            The data type of the tensor
-
-        name: str, optional
-            The name hint of the tensor
-
-        stype: str, optional
-            The storage type of the tensor
-        """
-        super(CSRBuffer, self).__init__(self)
-        self.shape = shape
-        self.dtype = dtype
-        self.name = name
-        self.stype = stype
-        shape = (0,)
-        self.data = _api.decl_buffer(shape, dtype, name+'_data')
-        self.indices = _api.decl_buffer(shape, 'int32', name+'_indices')
-        self.indptr = _api.decl_buffer(shape, 'int32', name+'_indptr')
-
-
 def placeholder(shape, nonzeros=None, dtype=None, name="placeholder", stype=None):
-    """Construct an empty tensor object.
+    """Construct an empty sparse tensor object.
 
     Parameters
     ----------
diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py
index d70698068baf..df49e60e42e1 100644
--- a/topi/python/topi/sparse/csrmv.py
+++ b/topi/python/topi/sparse/csrmv.py
@@ -1,27 +1,32 @@
-"""TVM operator fully connected compute."""
+"""TVM operator compute SpMV in CSR format."""
 from __future__ import absolute_import
 import tvm
 from .. import tag
 
 def csrmv_default(data, indices, indptr, weight, bias=None):
-    # pylint: disable=unused-argument
     """The default implementation of csrmv in topi.
 
     Parameters
     ----------
     data : tvm.Tensor
-        2-D with shape [batch, in_dim]
+        1-D with shape [num_nonzeros]
+
+    indices : tvm.Tensor
+        1-D with shape [num_nonzeros]
+
+    indptr : tvm.Tensor
+        1-D with shape [num_rows+1]
 
     weight : tvm.Tensor
-        2-D with shape [out_dim, in_dim]
+        1-D with shape [num_cols]
 
     bias : tvm.Tensor, optional
-        1-D with shape [out_dim]
+        1-D with shape [num_rows]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [batch, out_dim]
+        1-D with shape [num_rows]
     """
     assert len(data.shape) == 1 and len(weight.shape) == 2, \
         "only support 2-dim csrmv"
@@ -32,26 +37,25 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
     batch = indptr.shape[0]-1
     out_dim, _ = weight.shape
     def csrmv_default_ir(data, indices, indptr, weight, out):
-        # pylint: disable=invalid-name
-        """Define IR for SpMM"""
-        ib = tvm.ir_builder.create()
-        data_ptr = ib.buffer_ptr(data)
-        indices_ptr = ib.buffer_ptr(indices)
-        indptr_ptr = ib.buffer_ptr(indptr)
-        weight_ptr = ib.buffer_ptr(weight)
-        out_ptr = ib.buffer_ptr(out)
+        """Define IR for SpMV"""
+        irb = tvm.ir_builder.create()
+        data_ptr = irb.buffer_ptr(data)
+        indices_ptr = irb.buffer_ptr(indices)
+        indptr_ptr = irb.buffer_ptr(indptr)
+        weight_ptr = irb.buffer_ptr(weight)
+        out_ptr = irb.buffer_ptr(out)
         num_rows = indptr.shape[0]-1
-        with ib.for_range(0, num_rows, name='row') as row:
-            dot = ib.allocate('float32', (1,), name='dot', scope='local')
+        with irb.for_range(0, num_rows, name='row') as row:
+            dot = irb.allocate('float32', (1,), name='dot', scope='local')
             dot[0] = 0.
             row_start = indptr_ptr[row]
             row_end = indptr_ptr[row+1]
             row_elems = row_end-row_start
-            with ib.for_range(0, row_elems, name='elemidx') as elemidx:
+            with irb.for_range(0, row_elems, name='elemidx') as elemidx:
                 elem = row_start+elemidx
                 dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]]
             out_ptr[row] += dot[0]
-        return ib.get()
+        return irb.get()
     oshape = (batch, 1)
     matmul = tvm.extern(oshape, [data, indices, indptr, weight],
                         lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
@@ -68,7 +72,7 @@ def csrmv(data, weight, bias=None):
 
     Parameters
     ----------
-    data : tvm.Tensor
+    data : tvm.contrib.CSRTensor
         2-D with shape [batch, in_dim]
 
     weight : tvm.Tensor
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index d4bd8de6d2ad..1ea519e2d371 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -91,12 +91,7 @@ def check_device(device):
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros((_nr, 1), dtype=dtype), ctx)
-        Ab = namedtuple('CSRBuffer', ['data','indices','indptr'])
-        Ab.data = tvm.decl_buffer(A.data.shape, A.data.dtype, name='A_data')
-        Ab.indices = tvm.decl_buffer(A.indices.shape, A.indices.dtype, name='A_indices')
-        Ab.indptr = tvm.decl_buffer(A.indptr.shape, A.indptr.dtype, name='A_indptr')
-        binds = {A.data: Ab.data, A.indices: Ab.indices, A.indptr: Ab.indptr}
-        f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmv", binds=binds)
+        f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmv")
         f(_nr, a.data, a.indices, a.indptr, b, c, d)
         print(d.asnumpy().T)
         print(d_np.T)

From 7afe978792d79f4ffae085f1d4df862e5c1d6c49 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Wed, 27 Jun 2018 13:38:16 +0800
Subject: [PATCH 19/36] improved reliability by initializing output ptr to
 zero;

---
 topi/python/topi/sparse/__init__.py   |  1 -
 topi/python/topi/sparse/csrmv.py      | 10 ++---
 topi/python/topi/sparse/elemwise.py   | 20 ----------
 topi/tests/python/test_topi_sparse.py | 57 +++------------------------
 4 files changed, 11 insertions(+), 77 deletions(-)
 delete mode 100644 topi/python/topi/sparse/elemwise.py

diff --git a/topi/python/topi/sparse/__init__.py b/topi/python/topi/sparse/__init__.py
index 6c25938b34a3..5e86db6fb15c 100644
--- a/topi/python/topi/sparse/__init__.py
+++ b/topi/python/topi/sparse/__init__.py
@@ -3,4 +3,3 @@
 from __future__ import absolute_import as _abs
 
 from .csrmv import csrmv
-from .elemwise import *
diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py
index df49e60e42e1..057611bb6ccf 100644
--- a/topi/python/topi/sparse/csrmv.py
+++ b/topi/python/topi/sparse/csrmv.py
@@ -18,7 +18,7 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
         1-D with shape [num_rows+1]
 
     weight : tvm.Tensor
-        1-D with shape [num_cols]
+        2-D with shape [num_cols, 1]
 
     bias : tvm.Tensor, optional
         1-D with shape [num_rows]
@@ -26,7 +26,7 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
     Returns
     -------
     output : tvm.Tensor
-        1-D with shape [num_rows]
+        2-D with shape [num_rows, 1]
     """
     assert len(data.shape) == 1 and len(weight.shape) == 2, \
         "only support 2-dim csrmv"
@@ -47,6 +47,7 @@ def csrmv_default_ir(data, indices, indptr, weight, out):
         num_rows = indptr.shape[0]-1
         with irb.for_range(0, num_rows, name='row') as row:
             dot = irb.allocate('float32', (1,), name='dot', scope='local')
+            out_ptr[row] = 0.
             dot[0] = 0.
             row_start = indptr_ptr[row]
             row_end = indptr_ptr[row+1]
@@ -59,10 +60,9 @@ def csrmv_default_ir(data, indices, indptr, weight, out):
     oshape = (batch, 1)
     matmul = tvm.extern(oshape, [data, indices, indptr, weight],
                         lambda ins, outs: csrmv_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
-                        tag="csrmv", dtype='float32')
+                        tag="csrmv", dtype='float32', name='csrmv')
     if bias is not None:
-        matmul = tvm.compute((batch, out_dim), \
-                             lambda i, j: matmul[i, j] + bias[j], \
+        matmul = tvm.compute((batch, 1), lambda i, j: matmul[i, 0] + bias[i], \
                              tag=tag.BROADCAST)
     return matmul
 
diff --git a/topi/python/topi/sparse/elemwise.py b/topi/python/topi/sparse/elemwise.py
deleted file mode 100644
index 7d68fa990a97..000000000000
--- a/topi/python/topi/sparse/elemwise.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""Elementwise operators"""
-from __future__ import absolute_import as _abs
-import tvm
-from .. import tag
-
-@tvm.tag_scope(tag=tag.ELEMWISE)
-def relu(x):
-    """Take relu of input x.
-
-    Parameters
-    ----------
-    x : tvm.Tensor
-        Input argument.
-
-    Returns
-    -------
-    y : tvm.Tensor
-        The result.
-    """
-    return tvm.compute(x.shape, lambda *i: tvm.max(x(*i), tvm.const(0, x.dtype)))
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 1ea519e2d371..963dcd2fc8fd 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -12,57 +12,12 @@
 import tvm.contrib.sparse as tvmsp
 from collections import namedtuple
 
-def verify_static_csrmv(batch, in_dim, out_dim, use_bias=True):
-    A = tvmsp.placeholder((batch, in_dim), name='A')
-    B = tvm.placeholder((in_dim, 1), name='B')
-    C = tvm.placeholder((batch, 1), name='C')
-    D = topi.sparse.csrmv(A, B, C if use_bias else None)
-    dtype = A.dtype
-
-    # get the test data
-    def get_ref_data():
-        a_np = np.maximum(np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5, 0.)
-        b_np = np.random.uniform(size=(in_dim, 1)).astype(dtype)-0.5
-        c_np = np.random.uniform(size=(batch, 1)).astype(dtype)
-        if use_bias:
-            d_np = np.dot(a_np, b_np) + c_np
-        else:
-            d_np = np.dot(a_np, b_np)
-        return (a_np, b_np, c_np, d_np)
-    a_np, b_np, c_np, d_np = get_ref_data()
-
-    device = 'llvm'
-    ctx = tvm.context(device, 0)
-    if not ctx.exist:
-        print("Skip because %s is not enabled" % device)
-        return
-    print("Running on target: %s" % device)
-    a = tvmsp.array(a_np, ctx)
-    b = tvm.nd.array(b_np, ctx)
-    c = tvm.nd.array(c_np, ctx)
-    d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
-    A.data = tvm.placeholder(shape=a.data.shape, dtype='float32', name='A_data')
-    A.indices = tvm.placeholder(shape=a.indices.shape, dtype='int32', name='A_indices')
-    A.indptr = tvm.placeholder(shape=a.indptr.shape, dtype='int32', name='A_indptr')
-    D = topi.sparse.csrmv(A, B, C if use_bias else None)
-    s = tvm.create_schedule(D.op)
-    Ab = namedtuple('CSRBuffer', ['data','indices','indptr'])
-    Ab.data = tvm.decl_buffer(A.data.shape, A.data.dtype, name='A_data')
-    Ab.indices = tvm.decl_buffer(A.indices.shape, A.indices.dtype, name='A_indices')
-    Ab.indptr = tvm.decl_buffer(A.indptr.shape, A.indptr.dtype, name='A_indptr')
-    binds = {A.data: Ab.data, A.indices: Ab.indices, A.indptr: Ab.indptr}
-    f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="csrmv", binds=binds)
-    f(a.data, a.indices, a.indptr, b, c, d)
-    print(d.asnumpy().T)
-    print(d_np.T)
-    np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
-
 def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
     nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n")
     dtype = 'float32'
     A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A')
     B = tvm.placeholder((in_dim, 1), name='B')
-    C = tvm.placeholder((batch, 1), name='C')
+    C = tvm.placeholder((nr,), name='C')
     D = topi.sparse.csrmv(A, B, C if use_bias else None)
     s = tvm.create_schedule(D.op)
     dtype = A.dtype
@@ -71,9 +26,9 @@ def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
     def get_ref_data():
         a_np = np.maximum(np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5, 0.)
         b_np = np.random.uniform(size=(in_dim, 1)).astype(dtype)-0.5
-        c_np = np.random.uniform(size=(batch, 1)).astype(dtype)
+        c_np = np.random.uniform(size=(batch, )).astype(dtype)
         if use_bias:
-            d_np = np.dot(a_np, b_np) + c_np
+            d_np = np.dot(a_np, b_np) + c_np.reshape((batch, 1))
         else:
             d_np = np.dot(a_np, b_np)
         return (a_np, b_np, c_np, d_np)
@@ -95,14 +50,14 @@ def check_device(device):
         f(_nr, a.data, a.indices, a.indptr, b, c, d)
         print(d.asnumpy().T)
         print(d_np.T)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4)
 
     for device in ["llvm"]:
         check_device(device)
 
 def test_csrmv():
-    verify_dynamic_csrmv(batch=3, in_dim=5, out_dim=1, use_bias=False)
-    verify_static_csrmv(batch=3, in_dim=5, out_dim=1, use_bias=False)
+    verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=False)
+    verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=True)
 
 if __name__ == "__main__":
     test_csrmv()

From 1c75c6819c7895386f661c4cec4efa3be6536f6a Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Wed, 27 Jun 2018 18:04:50 +0800
Subject: [PATCH 20/36] implement csrmm with parallel for loop;

---
 topi/python/topi/sparse/__init__.py   |  1 +
 topi/python/topi/sparse/csrmm.py      | 92 +++++++++++++++++++++++++++
 topi/python/topi/sparse/csrmv.py      |  3 +-
 topi/tests/python/test_topi_sparse.py | 51 +++++++++++++++
 4 files changed, 145 insertions(+), 2 deletions(-)
 create mode 100644 topi/python/topi/sparse/csrmm.py

diff --git a/topi/python/topi/sparse/__init__.py b/topi/python/topi/sparse/__init__.py
index 5e86db6fb15c..8c386b6229f1 100644
--- a/topi/python/topi/sparse/__init__.py
+++ b/topi/python/topi/sparse/__init__.py
@@ -3,3 +3,4 @@
 from __future__ import absolute_import as _abs
 
 from .csrmv import csrmv
+from .csrmm import csrmm
diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py
new file mode 100644
index 000000000000..01a1ff3b70c3
--- /dev/null
+++ b/topi/python/topi/sparse/csrmm.py
@@ -0,0 +1,92 @@
+"""TVM operator compute SpMV in CSR format."""
+from __future__ import absolute_import
+import tvm
+from .. import tag
+
+def csrmm_default(data, indices, indptr, weight, bias=None):
+    # pylint: disable=invalid-name
+    """The default implementation of csrmm in topi.
+
+    Parameters
+    ----------
+    data : tvm.Tensor
+        1-D with shape [num_nonzeros]
+
+    indices : tvm.Tensor
+        1-D with shape [num_nonzeros]
+
+    indptr : tvm.Tensor
+        1-D with shape [num_rows+1]
+
+    weight : tvm.Tensor
+        2-D with shape [num_cols, 1]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [num_rows]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [num_rows, 1]
+    """
+    assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
+        and len(weight.shape) == 2, "only support 2-dim csrmm"
+    assert isinstance(weight, tvm.tensor.Tensor), \
+        "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+    if bias is not None:
+        assert len(bias.shape) == 1
+    M = indptr.shape[0]-1
+    _, N = weight.shape
+    def csrmm_default_ir(data, indices, indptr, weight, out):
+        """Define IR for SpMV"""
+        irb = tvm.ir_builder.create()
+        data_ptr = irb.buffer_ptr(data)
+        indices_ptr = irb.buffer_ptr(indices)
+        indptr_ptr = irb.buffer_ptr(indptr)
+        weight_ptr = irb.buffer_ptr(weight)
+        out_ptr = irb.buffer_ptr(out)
+        M = indptr.shape[0]-1
+        _, N = weight.shape
+        with irb.for_range(0, N, name='n') as n:
+            with irb.for_range(0, M, for_type="parallel", name='row') as row:
+                dot = irb.allocate('float32', (1,), name='dot', scope='local')
+                out_ptr[row*N+n] = 0.
+                dot[0] = 0.
+                row_start = indptr_ptr[row]
+                row_end = indptr_ptr[row+1]
+                row_elems = row_end-row_start
+                with irb.for_range(0, row_elems, name='idx') as idx:
+                    elem = row_start+idx
+                    dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]*N+n]
+                out_ptr[row*N+n] += dot[0]
+        return irb.get()
+    oshape = (M, N)
+    matmul = tvm.extern(oshape, [data, indices, indptr, weight],
+                        lambda ins, outs: csrmm_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+                        tag="csrmm", dtype='float32', name='out')
+    if bias is not None:
+        matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \
+                             tag=tag.BROADCAST)
+    return matmul
+
+
+def csrmm(data, weight, bias=None):
+    """Applies a linear transformation: :math:`Y = XW^T + b`.
+
+    Parameters
+    ----------
+    data : tvm.contrib.CSRTensor
+        2-D with shape [batch, in_dim]
+
+    weight : tvm.Tensor
+        2-D with shape [out_dim, in_dim]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [out_dim]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [batch, out_dim]
+    """
+    return csrmm_default(data.data, data.indices, data.indptr, weight, bias)
diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py
index 057611bb6ccf..9feb96c4c410 100644
--- a/topi/python/topi/sparse/csrmv.py
+++ b/topi/python/topi/sparse/csrmv.py
@@ -35,7 +35,6 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
     if bias is not None:
         assert len(bias.shape) == 1
     batch = indptr.shape[0]-1
-    out_dim, _ = weight.shape
     def csrmv_default_ir(data, indices, indptr, weight, out):
         """Define IR for SpMV"""
         irb = tvm.ir_builder.create()
@@ -45,7 +44,7 @@ def csrmv_default_ir(data, indices, indptr, weight, out):
         weight_ptr = irb.buffer_ptr(weight)
         out_ptr = irb.buffer_ptr(out)
         num_rows = indptr.shape[0]-1
-        with irb.for_range(0, num_rows, name='row') as row:
+        with irb.for_range(0, num_rows, for_type="parallel", name='row') as row:
             dot = irb.allocate('float32', (1,), name='dot', scope='local')
             out_ptr[row] = 0.
             dot[0] = 0.
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 963dcd2fc8fd..488a8d853545 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -11,6 +11,7 @@
 from topi.util import get_const_tuple
 import tvm.contrib.sparse as tvmsp
 from collections import namedtuple
+import time
 
 def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
     nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n")
@@ -55,9 +56,59 @@ def check_device(device):
     for device in ["llvm"]:
         check_device(device)
 
+def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True):
+    nr, nc, n = tvm.var("nr"), tvm.var("nc"), tvm.var("n")
+    dtype = 'float32'
+    A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, dtype=dtype, name='A')
+    B = tvm.placeholder((in_dim, out_dim), name='B')
+    C = tvm.placeholder((nr,), name='C')
+    D = topi.sparse.csrmm(A, B, C if use_bias else None)
+    s = tvm.create_schedule(D.op)
+    dtype = A.dtype
+
+    # get the test data
+    def get_ref_data():
+        a_np = np.maximum(np.random.uniform(size=(batch, in_dim)).astype(dtype)-0.5, 0.)
+        b_np = np.random.uniform(size=(in_dim, out_dim)).astype(dtype)-0.5
+        c_np = np.random.uniform(size=(batch, )).astype(dtype)
+        if use_bias:
+            d_np = np.dot(a_np, b_np) + c_np.reshape((batch, 1))
+        else:
+            d_np = np.dot(a_np, b_np)
+        return (a_np, b_np, c_np, d_np)
+    a_np, b_np, c_np, d_np = get_ref_data()
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        a = tvmsp.array(a_np, ctx)
+        _nr, _nc, _n = a.shape[0], a.shape[1], a.data.shape[0]
+        assert a.shape[0] == a.indptr.shape[0]-1
+        b = tvm.nd.array(b_np, ctx)
+        c = tvm.nd.array(c_np, ctx)
+        d = tvm.nd.array(np.zeros((_nr, out_dim), dtype=dtype), ctx)
+        f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmm")
+
+        f(_nr, a.data, a.indices, a.indptr, b, c, d)
+        print(d.asnumpy().T)
+        print(d_np.T)
+        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-2)
+
+    for device in ["llvm"]:
+        check_device(device)
+
 def test_csrmv():
     verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=False)
     verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=True)
 
+def test_csrmm():
+    M, K, N = 5, 7, 2
+    verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=False)
+    verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=True)
+
 if __name__ == "__main__":
     test_csrmv()
+    test_csrmm()

From 5e67aaa70ec864f59f12f84d5466e11028025838 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 28 Jun 2018 11:30:44 +0800
Subject: [PATCH 21/36] enable tensorize to speedup computation;

---
 topi/python/topi/sparse/csrmm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py
index 01a1ff3b70c3..466220313c13 100644
--- a/topi/python/topi/sparse/csrmm.py
+++ b/topi/python/topi/sparse/csrmm.py
@@ -47,7 +47,7 @@ def csrmm_default_ir(data, indices, indptr, weight, out):
         out_ptr = irb.buffer_ptr(out)
         M = indptr.shape[0]-1
         _, N = weight.shape
-        with irb.for_range(0, N, name='n') as n:
+        with irb.for_range(0, N, for_type="vectorize", name='n') as n:
             with irb.for_range(0, M, for_type="parallel", name='row') as row:
                 dot = irb.allocate('float32', (1,), name='dot', scope='local')
                 out_ptr[row*N+n] = 0.

From cbdce65ecfb0c1a6bddc71648234be069d179ba3 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 28 Jun 2018 18:32:08 +0800
Subject: [PATCH 22/36] trying implement sparse fully connected layer based on
 csr format;

---
 topi/python/topi/sparse/__init__.py   |  1 +
 topi/python/topi/sparse/csrmm.py      | 13 ++--
 topi/python/topi/sparse/dense.py      | 92 +++++++++++++++++++++++++++
 topi/tests/python/test_topi_sparse.py | 43 +++++++++++++
 4 files changed, 143 insertions(+), 6 deletions(-)
 create mode 100644 topi/python/topi/sparse/dense.py

diff --git a/topi/python/topi/sparse/__init__.py b/topi/python/topi/sparse/__init__.py
index 8c386b6229f1..bfac967d2f76 100644
--- a/topi/python/topi/sparse/__init__.py
+++ b/topi/python/topi/sparse/__init__.py
@@ -4,3 +4,4 @@
 
 from .csrmv import csrmv
 from .csrmm import csrmm
+from .dense import dense
diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py
index 466220313c13..f8f854ebe008 100644
--- a/topi/python/topi/sparse/csrmm.py
+++ b/topi/python/topi/sparse/csrmm.py
@@ -1,7 +1,8 @@
-"""TVM operator compute SpMV in CSR format."""
+"""TVM operator compute SpMM in CSR format."""
 from __future__ import absolute_import
 import tvm
 from .. import tag
+from ..util import simplify
 
 def csrmm_default(data, indices, indptr, weight, bias=None):
     # pylint: disable=invalid-name
@@ -16,18 +17,18 @@ def csrmm_default(data, indices, indptr, weight, bias=None):
         1-D with shape [num_nonzeros]
 
     indptr : tvm.Tensor
-        1-D with shape [num_rows+1]
+        1-D with shape [M+1]
 
     weight : tvm.Tensor
-        2-D with shape [num_cols, 1]
+        2-D with shape [K, N]
 
     bias : tvm.Tensor, optional
-        1-D with shape [num_rows]
+        1-D with shape [M]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [num_rows, 1]
+        2-D with shape [M, N]
     """
     assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
         and len(weight.shape) == 2, "only support 2-dim csrmm"
@@ -38,7 +39,7 @@ def csrmm_default(data, indices, indptr, weight, bias=None):
     M = indptr.shape[0]-1
     _, N = weight.shape
     def csrmm_default_ir(data, indices, indptr, weight, out):
-        """Define IR for SpMV"""
+        """Define IR for SpMM"""
         irb = tvm.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
new file mode 100644
index 000000000000..6f7dde554243
--- /dev/null
+++ b/topi/python/topi/sparse/dense.py
@@ -0,0 +1,92 @@
+"""TVM operator compute SpMM in CSR format."""
+from __future__ import absolute_import
+import tvm
+from .. import tag
+
+def dense_default(data, indices, indptr, weight, bias=None):
+    # pylint: disable=invalid-name
+    """The default implementation of dense in topi.
+
+    Parameters
+    ----------
+    data : tvm.Tensor
+        1-D with shape [num_nonzeros]
+
+    indices : tvm.Tensor
+        1-D with shape [num_nonzeros]
+
+    indptr : tvm.Tensor
+        1-D with shape [M+1]
+
+    weight : tvm.Tensor
+        2-D with shape [K, N]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [M]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [M, N]
+    """
+    assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
+        and len(weight.shape) == 2, "only support 2-dim dense"
+    assert isinstance(weight, tvm.tensor.Tensor), \
+        "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+    if bias is not None:
+        assert len(bias.shape) == 1
+    M = indptr.shape[0]-1
+    _, N = weight.shape
+    def dense_default_ir(data, indices, indptr, weight, out):
+        """Define IR for SpMM"""
+        irb = tvm.ir_builder.create()
+        data_ptr = irb.buffer_ptr(data)
+        indices_ptr = irb.buffer_ptr(indices)
+        indptr_ptr = irb.buffer_ptr(indptr)
+        weight_ptr = irb.buffer_ptr(weight)
+        out_ptr = irb.buffer_ptr(out)
+        M = indptr.shape[0]-1
+        _, N = weight.shape
+        with irb.for_range(0, N, for_type="vectorize", name='n') as n:
+            with irb.for_range(0, M, for_type="parallel", name='row') as row:
+                dot = irb.allocate('float32', (1,), name='dot', scope='local')
+                out_ptr[row*N+n] = 0.
+                dot[0] = 0.
+                row_start = indptr_ptr[row]
+                row_end = indptr_ptr[row+1]
+                row_elems = row_end-row_start
+                with irb.for_range(0, row_elems, name='idx') as idx:
+                    elem = row_start+idx
+                    dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]*N+n]
+                out_ptr[row*N+n] += dot[0]
+        return irb.get()
+    oshape = (M, N)
+    matmul = tvm.extern(oshape, [data, indices, indptr, weight],
+                        lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+                        tag="dense", dtype='float32', name='out')
+    if bias is not None:
+        matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \
+                             tag=tag.BROADCAST)
+    return matmul
+
+
+def dense(data, weight, bias=None):
+    """Applies a linear transformation: :math:`Y = XW^T + b`.
+
+    Parameters
+    ----------
+    data : tvm.contrib.CSRTensor
+        2-D with shape [batch, in_dim]
+
+    weight : tvm.Tensor
+        2-D with shape [out_dim, in_dim]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [out_dim]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [batch, out_dim]
+    """
+    return dense_default(data.data, data.indices, data.indptr, weight, bias)
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 488a8d853545..288035394c91 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -109,6 +109,49 @@ def test_csrmm():
     verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=False)
     verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=True)
 
+
+def verify_dense(batch, in_dim, out_dim, use_bias=True):
+    A = tvmsp.placeholder((batch, in_dim), name='A')
+    B = tvm.placeholder((out_dim, in_dim), name='B')
+    C = tvm.placeholder((out_dim,), name='C')
+    D = topi.sparse.dense(A, B, C if use_bias else None)
+    s = tvm.create_schedule(D.op)
+    dtype = A.dtype
+
+    # get the test data
+    def get_ref_data():
+        a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)
+        b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)
+        c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
+        if use_bias:
+            d_np = np.dot(a_np, b_np.T) + c_np
+        else:
+            d_np = np.dot(a_np, b_np.T)
+        return (a_np, b_np, c_np, d_np)
+    a_np, b_np, c_np, d_np = get_ref_data()
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        a = tvmsp.array(a_np, ctx)
+        b = tvm.nd.array(b_np, ctx)
+        c = tvm.nd.array(c_np, ctx)
+        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
+        f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
+        f(a.data, a.indices, a.indptr, b, c, d)
+        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+
+    check_device('llvm')
+
+def test_dense():
+    M, K, N = 5, 7, 2
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False)
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True)
+
 if __name__ == "__main__":
     test_csrmv()
     test_csrmm()
+    test_dense()

From 6926ca7be4b78a22e8ff233aa8290d61f92f6bdc Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 29 Jun 2018 13:23:45 +0800
Subject: [PATCH 23/36] first successful dense layer in csr format;

---
 topi/python/topi/sparse/dense.py      | 36 +++++++++++++++------------
 topi/tests/python/test_topi_sparse.py | 29 ++++++++++++++-------
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index 6f7dde554243..4cdeaa5bc0ed 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -1,7 +1,8 @@
-"""TVM operator compute SpMM in CSR format."""
+"""TVM operator compute Dense in CSR format."""
 from __future__ import absolute_import
 import tvm
 from .. import tag
+from ..util import simplify
 
 def dense_default(data, indices, indptr, weight, bias=None):
     # pylint: disable=invalid-name
@@ -35,38 +36,41 @@ def dense_default(data, indices, indptr, weight, bias=None):
         "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
     if bias is not None:
         assert len(bias.shape) == 1
+    dtype = data.dtype
     M = indptr.shape[0]-1
-    _, N = weight.shape
+    N, K = weight.shape
     def dense_default_ir(data, indices, indptr, weight, out):
-        """Define IR for SpMM"""
+        """Define IR for Dense"""
+        dtype = data.dtype
         irb = tvm.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
         indptr_ptr = irb.buffer_ptr(indptr)
         weight_ptr = irb.buffer_ptr(weight)
         out_ptr = irb.buffer_ptr(out)
-        M = indptr.shape[0]-1
-        _, N = weight.shape
+        M = simplify(indptr.shape[0]-1)
+        N, K = weight.shape
         with irb.for_range(0, N, for_type="vectorize", name='n') as n:
-            with irb.for_range(0, M, for_type="parallel", name='row') as row:
-                dot = irb.allocate('float32', (1,), name='dot', scope='local')
-                out_ptr[row*N+n] = 0.
+            with irb.for_range(0, M, for_type="parallel", name='m') as m:
+                dot = irb.allocate(dtype, (1,), name='dot', scope='local')
+                out_ptr[m*N+n] = 0.
                 dot[0] = 0.
-                row_start = indptr_ptr[row]
-                row_end = indptr_ptr[row+1]
-                row_elems = row_end-row_start
-                with irb.for_range(0, row_elems, name='idx') as idx:
-                    elem = row_start+idx
-                    dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]*N+n]
-                out_ptr[row*N+n] += dot[0]
+                row_start = indptr_ptr[m]
+                row_elems = indptr_ptr[m+1]-row_start
+                with irb.for_range(0, row_elems, name='k') as k:
+                    elem = row_start+k
+                    dot[0] += data_ptr[elem] * weight_ptr[indices_ptr[elem]+n*K]
+                out_ptr[m*N+n] += dot[0]
         return irb.get()
     oshape = (M, N)
     matmul = tvm.extern(oshape, [data, indices, indptr, weight],
                         lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
-                        tag="dense", dtype='float32', name='out')
+                        tag="dense", dtype=dtype, name='out')
+    print(matmul.op.body)
     if bias is not None:
         matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \
                              tag=tag.BROADCAST)
+        print(matmul.op.body)
     return matmul
 
 
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 288035394c91..587696c4a8e3 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -111,18 +111,19 @@ def test_csrmm():
 
 
 def verify_dense(batch, in_dim, out_dim, use_bias=True):
-    A = tvmsp.placeholder((batch, in_dim), name='A')
-    B = tvm.placeholder((out_dim, in_dim), name='B')
-    C = tvm.placeholder((out_dim,), name='C')
+    n = tvm.var('n')
+    dtype = 'float32'
+    A = tvmsp.placeholder((batch, in_dim), nonzeros=n, dtype=dtype, name='A')
+    B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B')
+    C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
     D = topi.sparse.dense(A, B, C if use_bias else None)
     s = tvm.create_schedule(D.op)
-    dtype = A.dtype
 
     # get the test data
     def get_ref_data():
-        a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)
-        b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)
-        c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
+        a_np = np.maximum(10*(np.random.uniform(size=(batch, in_dim)).astype('float32')-0.5), 0.).astype(dtype)
+        b_np = (10*np.random.uniform(size=(out_dim, in_dim)).astype('float32')).astype(dtype)
+        c_np = (10*np.random.uniform(size=(out_dim,)).astype('float32')).astype(dtype)
         if use_bias:
             d_np = np.dot(a_np, b_np.T) + c_np
         else:
@@ -137,19 +138,29 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
         a = tvmsp.array(a_np, ctx)
+        _nr, _nc, _n = a.shape[0], a.shape[1], a.data.shape[0]
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
+        print(a_np)
+        print(a.data)
+        print(a.indices)
+        print(a.indptr)
+        print(b)
+        print(c)
         f(a.data, a.indices, a.indptr, b, c, d)
+        print('--')
+        print(d_np)
+        print(d.asnumpy())
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
     check_device('llvm')
 
 def test_dense():
-    M, K, N = 5, 7, 2
+    M, K, N = 3, 5, 2
     verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False)
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True)
+    # verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True)
 
 if __name__ == "__main__":
     test_csrmv()

From 89c8835a1d087eeaa82f15e696a130e6fbd1e518 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 29 Jun 2018 16:19:04 +0800
Subject: [PATCH 24/36] support dense computation in csr format;

---
 topi/python/topi/sparse/csrmm.py      |  4 ++--
 topi/python/topi/sparse/dense.py      | 12 +++++-----
 topi/tests/python/test_topi_sparse.py | 32 ++++++++++++---------------
 3 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py
index f8f854ebe008..c7f5184a8954 100644
--- a/topi/python/topi/sparse/csrmm.py
+++ b/topi/python/topi/sparse/csrmm.py
@@ -36,7 +36,7 @@ def csrmm_default(data, indices, indptr, weight, bias=None):
         "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
     if bias is not None:
         assert len(bias.shape) == 1
-    M = indptr.shape[0]-1
+    M = simplify(indptr.shape[0]-1)
     _, N = weight.shape
     def csrmm_default_ir(data, indices, indptr, weight, out):
         """Define IR for SpMM"""
@@ -46,7 +46,7 @@ def csrmm_default_ir(data, indices, indptr, weight, out):
         indptr_ptr = irb.buffer_ptr(indptr)
         weight_ptr = irb.buffer_ptr(weight)
         out_ptr = irb.buffer_ptr(out)
-        M = indptr.shape[0]-1
+        M = simplify(indptr.shape[0]-1)
         _, N = weight.shape
         with irb.for_range(0, N, for_type="vectorize", name='n') as n:
             with irb.for_range(0, M, for_type="parallel", name='row') as row:
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index 4cdeaa5bc0ed..4cd43b8d900d 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -37,8 +37,8 @@ def dense_default(data, indices, indptr, weight, bias=None):
     if bias is not None:
         assert len(bias.shape) == 1
     dtype = data.dtype
-    M = indptr.shape[0]-1
-    N, K = weight.shape
+    M = simplify(indptr.shape[0]-1)
+    N, _ = weight.shape
     def dense_default_ir(data, indices, indptr, weight, out):
         """Define IR for Dense"""
         dtype = data.dtype
@@ -53,8 +53,8 @@ def dense_default_ir(data, indices, indptr, weight, out):
         with irb.for_range(0, N, for_type="vectorize", name='n') as n:
             with irb.for_range(0, M, for_type="parallel", name='m') as m:
                 dot = irb.allocate(dtype, (1,), name='dot', scope='local')
-                out_ptr[m*N+n] = 0.
-                dot[0] = 0.
+                out_ptr[m*N+n] = tvm.const(0, dtype)
+                dot[0] = tvm.const(0, dtype)
                 row_start = indptr_ptr[m]
                 row_elems = indptr_ptr[m+1]-row_start
                 with irb.for_range(0, row_elems, name='k') as k:
@@ -66,11 +66,9 @@ def dense_default_ir(data, indices, indptr, weight, out):
     matmul = tvm.extern(oshape, [data, indices, indptr, weight],
                         lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
                         tag="dense", dtype=dtype, name='out')
-    print(matmul.op.body)
     if bias is not None:
-        matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[i], \
+        matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \
                              tag=tag.BROADCAST)
-        print(matmul.op.body)
     return matmul
 
 
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 587696c4a8e3..0e513d610074 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -110,10 +110,9 @@ def test_csrmm():
     verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=True)
 
 
-def verify_dense(batch, in_dim, out_dim, use_bias=True):
-    n = tvm.var('n')
-    dtype = 'float32'
-    A = tvmsp.placeholder((batch, in_dim), nonzeros=n, dtype=dtype, name='A')
+def verify_dense(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
+    nonzeros = tvm.var('nonzeros')
+    A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A')
     B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B')
     C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
     D = topi.sparse.dense(A, B, C if use_bias else None)
@@ -121,9 +120,10 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
 
     # get the test data
     def get_ref_data():
-        a_np = np.maximum(10*(np.random.uniform(size=(batch, in_dim)).astype('float32')-0.5), 0.).astype(dtype)
-        b_np = (10*np.random.uniform(size=(out_dim, in_dim)).astype('float32')).astype(dtype)
-        c_np = (10*np.random.uniform(size=(out_dim,)).astype('float32')).astype(dtype)
+        mag = 10.
+        a_np = np.maximum(mag*(np.random.uniform(size=(batch, in_dim)).astype('float32')-0.5), 0.).astype(dtype)
+        b_np = (mag*(np.random.uniform(size=(out_dim, in_dim)).astype('float32')-.5)).astype(dtype)
+        c_np = (mag*(np.random.uniform(size=(out_dim,)).astype('float32')-.5)).astype(dtype)
         if use_bias:
             d_np = np.dot(a_np, b_np.T) + c_np
         else:
@@ -138,29 +138,25 @@ def check_device(device):
             return
         print("Running on target: %s" % device)
         a = tvmsp.array(a_np, ctx)
-        _nr, _nc, _n = a.shape[0], a.shape[1], a.data.shape[0]
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
-        print(a_np)
-        print(a.data)
-        print(a.indices)
-        print(a.indptr)
-        print(b)
-        print(c)
         f(a.data, a.indices, a.indptr, b, c, d)
-        print('--')
-        print(d_np)
         print(d.asnumpy())
+        print(d_np)
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
     check_device('llvm')
 
 def test_dense():
     M, K, N = 3, 5, 2
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False)
-    # verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True)
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='float32')
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='float32')
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int32')
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int32')
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int16')
+    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int16')
 
 if __name__ == "__main__":
     test_csrmv()

From 42eaaa369e2a07e06c7ceb4e7aa72281f4124f79 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 29 Jun 2018 16:56:03 +0800
Subject: [PATCH 25/36] put test functions at the bottom;

---
 topi/tests/python/test_topi_sparse.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 0e513d610074..ae547bf6e1db 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -100,16 +100,6 @@ def check_device(device):
     for device in ["llvm"]:
         check_device(device)
 
-def test_csrmv():
-    verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=False)
-    verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=True)
-
-def test_csrmm():
-    M, K, N = 5, 7, 2
-    verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=False)
-    verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=True)
-
-
 def verify_dense(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
     nonzeros = tvm.var('nonzeros')
     A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A')
@@ -149,6 +139,15 @@ def check_device(device):
 
     check_device('llvm')
 
+def test_csrmv():
+    verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=False)
+    verify_dynamic_csrmv(batch=5, in_dim=7, out_dim=1, use_bias=True)
+
+def test_csrmm():
+    M, K, N = 5, 7, 2
+    verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=False)
+    verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=True)
+
 def test_dense():
     M, K, N = 3, 5, 2
     verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='float32')

From 67a1e1ff91124974a811f17988c5040e4e19326f Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Wed, 25 Jul 2018 18:12:32 +0800
Subject: [PATCH 26/36] convert to csr_matrix style input;

---
 python/tvm/contrib/sparse.py          | 50 +++++++++---------------
 tests/python/contrib/test_sparse.py   | 55 ++++++++++++++++++++-------
 topi/tests/python/test_topi_sparse.py |  1 +
 3 files changed, 61 insertions(+), 45 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 4dc8e76c0d27..75bbfd38b757 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -14,52 +14,43 @@
 @register_node
 class CSRNDArray(object):
     """Sparse tensor object in CSR format."""
-    def __init__(self, source_array=None,
-                 data=None, indices=None, indptr=None, ctx=None):
+    def __init__(self, arg1, ctx=None, shape=None):
         """Construct a sparse matrix in CSR format.
 
         Parameters
         ----------
-        source_array : numpy.ndarray
-            The corresponding numpy array.
-
-        data : tvm.ndarray (optional)
-            The data array for constructing sparse matrix
-
-        indices : tvm.ndarray (optional)
-            The indices array for constructing sparse matrix
-
-        indptr : tvm.ndarray (optional)
-            The indptr array for constructing sparse matrix
+        arg1 : numpy.ndarray or a tuple with (data, indices, indptr)
+            The corresponding a dense numpy array,
+            or a tuple for constructing a sparse matrix directly.
 
         ctx: tvm.TVMContext
             The corresponding context.
         """
-        self.stype = 'csr'
-        self.shape = source_array.shape
-        self.dtype = source_array.dtype
-        if data is None:
+        if isinstance(arg1, tuple):
+            self.data, self.indices, self.indptr = arg1[0], arg1[1], arg1[2]
+            self.shape = shape
+        elif isinstance(arg1, _np.ndarray):
+            source_array = arg1
             ridx, cidx = _np.nonzero(source_array)
             data = source_array[ridx, cidx]
             self.data = _nd.array(data, ctx)
-        else:
-            self.data = data
-        if indices is None:
             indices = _np.nonzero(source_array)[1].astype('int32')
             self.indices = _nd.array(indices, ctx)
-        else:
-            self.indices = indices
-        if indptr is None:
             indptr = [0]+_np.apply_along_axis(_np.count_nonzero, axis=1, arr=source_array).tolist()
             indptr = _np.cumsum(_np.array(indptr, 'int32')).astype('int32')
             self.indptr = _nd.array(indptr, ctx)
+            self.shape = source_array.shape
         else:
-            self.indptr = indptr
+            raise RuntimeError("Construct CSRNDArray with either a tuple (data, indices, indptr) "
+                               "or a numpy.array, can't handle type %s." % (type(arg1),))
+        self.stype = 'csr'
+        self.dtype = self.data.dtype
+        assert self.shape is not None
         assert isinstance(self.data, _nd.NDArray)
         assert isinstance(self.indices, _nd.NDArray)
-        assert str(self.indices.dtype) == 'int32', str(self.indices.dtype)
+        assert str(self.indices.dtype) == 'int32' or str(self.indices.dtype) == 'int64', str(self.indices.dtype)
         assert isinstance(self.indptr, _nd.NDArray)
-        assert str(self.indptr.dtype) == 'int32', str(self.indptr.dtype)
+        assert str(self.indptr.dtype) == 'int32' or str(self.indptr.dtype) == 'int64', str(self.indptr.dtype)
 
     def asnumpy(self):
         """Construct a full matrix and convert it to numpy array."""
@@ -69,12 +60,9 @@ def asnumpy(self):
         full[ridx, self.indices.asnumpy().astype('int32')] = self.data.asnumpy()
         return full
 
-def array(source_array, ctx=None):
+def array(source_array, ctx=None, shape=None):
     """Construct a CSRNDArray from numpy.ndarray"""
-    ret = None
-    if isinstance(source_array, _np.ndarray):
-        return CSRNDArray(source_array=source_array, ctx=ctx)
-    return ret
+    return CSRNDArray(source_array, shape=shape, ctx=ctx)
 
 @register_node
 class CSRPlaceholderOp(object):
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 489145c6ea1a..55545420ba72 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -4,6 +4,7 @@
 
 import tvm
 import tvm.contrib.sparse as tvmsp
+import tvm.ndarray as _nd
 import numpy as np
 from collections import namedtuple
 
@@ -15,28 +16,21 @@ def test_static_tensor():
     m = tvm.var('m')
     n = tvm.var('n')
     A = tvmsp.placeholder(shape=(m, n), name='A', dtype=dtype)
-    print(vars(A))
     assert(A.stype == 'csr')
     n = 3
     a = np.maximum(np.random.uniform(size=(n,n)).astype(dtype)-.6, 0.)
     a = tvmsp.array(a, ctx)
-    print(a.data.shape)
     A.data = tvm.placeholder(a.data.shape, dtype, name='A_data')
     Ab = tvm.decl_buffer(a.data.shape, dtype, name='A_data')
     binds = {A.data: Ab}
     C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
-    print(C.shape)
     s = tvm.create_schedule(C.op)
     f = tvm.build(s, [A.data, C], target, binds=binds)
-    print(a)
     c = tvmsp.array(np.zeros((n,n), dtype), ctx)
     c.data = tvm.nd.empty(a.data.shape, dtype)
     c.indices = a.indices
     c.indptr = a.indptr
     f(a.data, c.data)
-    print('==== output ====')
-    print(a.asnumpy()*2.)
-    print(c.asnumpy())
     np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
 
 def test_dynamic_tensor():
@@ -46,32 +40,65 @@ def test_dynamic_tensor():
     ctx = tvm.context(target, 0)
     nr, nc, n = tvm.var('nr'), tvm.var('nc'), tvm.var('n')
     A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
-    print(vars(A))
     assert(A.stype == 'csr')
     C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
-    print(C.shape)
     s = tvm.create_schedule(C.op)
     _nr, _nc = 3, 5
     a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.)
     a = tvmsp.array(a, ctx)
-    print(a.data.shape)
+    assert a.data.dtype == a.dtype
+    Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
+    Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
+    Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
+    binds = {A.data: Ab.data, A.indices: Ab.indices}
+    f = tvm.build(s, [nr, A.data, C], target, binds=binds)
+    c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
+    c.data = tvm.nd.empty(a.data.shape, dtype)
+    c.indices = a.indices
+    c.indptr = a.indptr
+    f(a.data.shape[0], a.data, c.data)
+    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
+
+def test_sparse_array_tuple():
+    dtype = 'float32'
+    stype = 'csr'
+    target = 'llvm'
+    ctx = tvm.context(target, 0)
+    nr, nc, n = tvm.var('nr'), tvm.var('nc'), tvm.var('n')
+    A = tvmsp.placeholder(shape=(nr, nc), nonzeros=n, name='A', dtype=dtype)
+    assert(A.stype == 'csr')
+    C = tvm.compute(A.data.shape, lambda i: A.data[i] * 2., tag='cs_scatter')
+    s = tvm.create_schedule(C.op)
+    _nr, _nc = 3, 5
+    a = np.maximum(np.random.uniform(size=(_nr, _nc)).astype(dtype)-.6, 0.)
+    # convert to sparse array tuple
+    source_array = a
+    ridx, cidx = np.nonzero(source_array)
+    data = source_array[ridx, cidx]
+    a_data = _nd.array(data, ctx)
+    indices = np.nonzero(source_array)[1].astype('int32')
+    a_indices = _nd.array(indices, ctx)
+    indptr = [0]+np.apply_along_axis(np.count_nonzero, axis=1, arr=source_array).tolist()
+    indptr = np.cumsum(np.array(indptr, 'int32')).astype('int32')
+    a_indptr = _nd.array(indptr, ctx)
+    a_init = (a_data, a_indices, a_indptr)
+    # construct tvm sparse array with tuple
+    a = tvmsp.array(a_init, shape=source_array.shape, ctx=ctx)
+    assert a.data.dtype == a.dtype
     Ab = namedtuple('CSRBuffer', ['data', 'indices', 'indptr'])
     Ab.data = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_data')
     Ab.indices = tvm.decl_buffer(a.data.shape, a.data.dtype, name='A_indices')
     binds = {A.data: Ab.data, A.indices: Ab.indices}
     f = tvm.build(s, [nr, A.data, C], target, binds=binds)
-    print(a)
     c = tvmsp.array(np.zeros((_nr, _nc), dtype), ctx)
     c.data = tvm.nd.empty(a.data.shape, dtype)
     c.indices = a.indices
     c.indptr = a.indptr
     f(a.data.shape[0], a.data, c.data)
-    print('==== output ====')
-    print(a.asnumpy()*2.)
-    print(c.asnumpy())
     np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
 
 if __name__ == "__main__":
     test_static_tensor()
     test_dynamic_tensor()
+    test_sparse_array_tuple()
 
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index ae547bf6e1db..2480f8ac8b38 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -107,6 +107,7 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
     C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
     D = topi.sparse.dense(A, B, C if use_bias else None)
     s = tvm.create_schedule(D.op)
+    print(tvm.lower(s, [A.data, A.indices, A.indptr, B, C], simple_mode=True))
 
     # get the test data
     def get_ref_data():

From 59d2ed3574798ffa8d46ec5e4e59c3bdcd8b708e Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Wed, 25 Jul 2018 18:17:20 +0800
Subject: [PATCH 27/36] satisfy lint;

---
 python/tvm/contrib/sparse.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 75bbfd38b757..96c6730e9a03 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -48,9 +48,11 @@ def __init__(self, arg1, ctx=None, shape=None):
         assert self.shape is not None
         assert isinstance(self.data, _nd.NDArray)
         assert isinstance(self.indices, _nd.NDArray)
-        assert str(self.indices.dtype) == 'int32' or str(self.indices.dtype) == 'int64', str(self.indices.dtype)
+        assert str(self.indices.dtype) == 'int32' or \
+            str(self.indices.dtype) == 'int64', str(self.indices.dtype)
         assert isinstance(self.indptr, _nd.NDArray)
-        assert str(self.indptr.dtype) == 'int32' or str(self.indptr.dtype) == 'int64', str(self.indptr.dtype)
+        assert str(self.indptr.dtype) == 'int32' or \
+            str(self.indptr.dtype) == 'int64', str(self.indptr.dtype)
 
     def asnumpy(self):
         """Construct a full matrix and convert it to numpy array."""

From 27fd5cad628e59015d08a10aa3346260b1c2bb7b Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 26 Jul 2018 13:50:34 +0800
Subject: [PATCH 28/36] fix incorrect comment, and index type assignment
 problem;

---
 python/tvm/contrib/sparse.py          | 15 ++++++-----
 tests/python/contrib/test_sparse.py   |  6 ++---
 topi/python/topi/sparse/csrmm.py      | 35 +++++++++++++-------------
 topi/python/topi/sparse/csrmv.py      | 36 ++++++++++++++-------------
 topi/python/topi/sparse/dense.py      |  2 +-
 topi/tests/python/test_topi_sparse.py |  3 +++
 6 files changed, 53 insertions(+), 44 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 96c6730e9a03..6729c3698beb 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -10,6 +10,7 @@
 
 float32 = "float32"
 csr = "csr"
+itype = 'int32'
 
 @register_node
 class CSRNDArray(object):
@@ -34,10 +35,10 @@ def __init__(self, arg1, ctx=None, shape=None):
             ridx, cidx = _np.nonzero(source_array)
             data = source_array[ridx, cidx]
             self.data = _nd.array(data, ctx)
-            indices = _np.nonzero(source_array)[1].astype('int32')
+            indices = _np.nonzero(source_array)[1].astype(itype)
             self.indices = _nd.array(indices, ctx)
             indptr = [0]+_np.apply_along_axis(_np.count_nonzero, axis=1, arr=source_array).tolist()
-            indptr = _np.cumsum(_np.array(indptr, 'int32')).astype('int32')
+            indptr = _np.cumsum(_np.array(indptr, itype)).astype(itype)
             self.indptr = _nd.array(indptr, ctx)
             self.shape = source_array.shape
         else:
@@ -56,10 +57,11 @@ def __init__(self, arg1, ctx=None, shape=None):
 
     def asnumpy(self):
         """Construct a full matrix and convert it to numpy array."""
+        # itype = 'int64'
         full = _np.zeros(self.shape, self.dtype)
         ridx = _np.diff(self.indptr.asnumpy())
-        ridx = _np.hstack((_np.ones((v,), 'int32')*i for i, v in enumerate(ridx)))
-        full[ridx, self.indices.asnumpy().astype('int32')] = self.data.asnumpy()
+        ridx = _np.hstack((_np.ones((v,), itype)*i for i, v in enumerate(ridx)))
+        full[ridx, self.indices.asnumpy().astype(itype)] = self.data.asnumpy()
         return full
 
 def array(source_array, ctx=None, shape=None):
@@ -86,13 +88,14 @@ def __init__(self, shape, nonzeros, dtype, name, stype):
         stype: str, optional
             The storage type of the tensor
         """
+        # itype = 'int64'
         self.shape = shape
         self.dtype = dtype
         self.name = name
         self.stype = stype
         self.data = _api.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data')
-        self.indices = _api.placeholder((nonzeros,), dtype='int32', name=self.name+'_indices')
-        self.indptr = _api.placeholder((self.shape[0]+1,), dtype='int32', name=self.name+'_indptr')
+        self.indices = _api.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices')
+        self.indptr = _api.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr')
         assert isinstance(self.data, _tensor.Tensor)
         assert isinstance(self.indices, _tensor.Tensor)
         assert isinstance(self.indptr, _tensor.Tensor)
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 55545420ba72..1d1ab3d549f7 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -60,7 +60,7 @@ def test_dynamic_tensor():
     np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
 
 def test_sparse_array_tuple():
-    dtype = 'float32'
+    dtype, itype = 'float32', 'int32'
     stype = 'csr'
     target = 'llvm'
     ctx = tvm.context(target, 0)
@@ -76,10 +76,10 @@ def test_sparse_array_tuple():
     ridx, cidx = np.nonzero(source_array)
     data = source_array[ridx, cidx]
     a_data = _nd.array(data, ctx)
-    indices = np.nonzero(source_array)[1].astype('int32')
+    indices = np.nonzero(source_array)[1].astype(itype)
     a_indices = _nd.array(indices, ctx)
     indptr = [0]+np.apply_along_axis(np.count_nonzero, axis=1, arr=source_array).tolist()
-    indptr = np.cumsum(np.array(indptr, 'int32')).astype('int32')
+    indptr = np.cumsum(np.array(indptr, itype)).astype(itype)
     a_indptr = _nd.array(indptr, ctx)
     a_init = (a_data, a_indices, a_indptr)
     # construct tvm sparse array with tuple
diff --git a/topi/python/topi/sparse/csrmm.py b/topi/python/topi/sparse/csrmm.py
index c7f5184a8954..f0574bf3df6d 100644
--- a/topi/python/topi/sparse/csrmm.py
+++ b/topi/python/topi/sparse/csrmm.py
@@ -11,24 +11,24 @@ def csrmm_default(data, indices, indptr, weight, bias=None):
     Parameters
     ----------
     data : tvm.Tensor
-        1-D with shape [num_nonzeros]
+        1-D with shape [nonzeros]
 
     indices : tvm.Tensor
-        1-D with shape [num_nonzeros]
+        1-D with shape [nonzeros]
 
     indptr : tvm.Tensor
-        1-D with shape [M+1]
+        1-D with shape [m+1]
 
     weight : tvm.Tensor
-        2-D with shape [K, N]
+        2-D with shape [k, n]
 
     bias : tvm.Tensor, optional
-        1-D with shape [M]
+        1-D with shape [m]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [M, N]
+        2-D with shape [m, n]
     """
     assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
         and len(weight.shape) == 2, "only support 2-dim csrmm"
@@ -39,7 +39,7 @@ def csrmm_default(data, indices, indptr, weight, bias=None):
     M = simplify(indptr.shape[0]-1)
     _, N = weight.shape
     def csrmm_default_ir(data, indices, indptr, weight, out):
-        """Define IR for SpMM"""
+        """define ir for csrmm"""
         irb = tvm.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
@@ -71,23 +71,24 @@ def csrmm_default_ir(data, indices, indptr, weight, out):
     return matmul
 
 
-def csrmm(data, weight, bias=None):
-    """Applies a linear transformation: :math:`Y = XW^T + b`.
+def csrmm(a, b, c=None):
+    """The `csrmm` routine performs a matrix-matrix operation defined as :math:`C := A*B + C`,
+    where `B` and `C` are dense matrices, `A` is an m-by-k sparse matrix in the CSR format.
 
     Parameters
     ----------
-    data : tvm.contrib.CSRTensor
-        2-D with shape [batch, in_dim]
+    a : tvm.contrib.sparse.CSRNDArray
+        2-D sparse matrix with shape [m, k]
 
-    weight : tvm.Tensor
-        2-D with shape [out_dim, in_dim]
+    b : tvm.Tensor
+        2-D dense matrix with shape [k, n]
 
-    bias : tvm.Tensor, optional
-        1-D with shape [out_dim]
+    c : tvm.Tensor, optional
+        1-D dense vector with shape [n]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [batch, out_dim]
+        2-D with shape [m, n]
     """
-    return csrmm_default(data.data, data.indices, data.indptr, weight, bias)
+    return csrmm_default(a.data, a.indices, a.indptr, b, c)
diff --git a/topi/python/topi/sparse/csrmv.py b/topi/python/topi/sparse/csrmv.py
index 9feb96c4c410..7cd101711cca 100644
--- a/topi/python/topi/sparse/csrmv.py
+++ b/topi/python/topi/sparse/csrmv.py
@@ -9,24 +9,24 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
     Parameters
     ----------
     data : tvm.Tensor
-        1-D with shape [num_nonzeros]
+        1-D with shape [nonzeros]
 
     indices : tvm.Tensor
-        1-D with shape [num_nonzeros]
+        1-D with shape [nonzeros]
 
     indptr : tvm.Tensor
-        1-D with shape [num_rows+1]
+        1-D with shape [m+1]
 
     weight : tvm.Tensor
-        2-D with shape [num_cols, 1]
+        2-D with shape [k, 1]
 
     bias : tvm.Tensor, optional
-        1-D with shape [num_rows]
+        1-D with shape [1]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [num_rows, 1]
+        2-D with shape [m, 1]
     """
     assert len(data.shape) == 1 and len(weight.shape) == 2, \
         "only support 2-dim csrmv"
@@ -36,7 +36,7 @@ def csrmv_default(data, indices, indptr, weight, bias=None):
         assert len(bias.shape) == 1
     batch = indptr.shape[0]-1
     def csrmv_default_ir(data, indices, indptr, weight, out):
-        """Define IR for SpMV"""
+        """define ir for csrmv"""
         irb = tvm.ir_builder.create()
         data_ptr = irb.buffer_ptr(data)
         indices_ptr = irb.buffer_ptr(indices)
@@ -66,23 +66,25 @@ def csrmv_default_ir(data, indices, indptr, weight, out):
     return matmul
 
 
-def csrmv(data, weight, bias=None):
-    """Applies a linear transformation: :math:`Y = XW^T + b`.
+def csrmv(a, x, y=None):
+    """The `csrmv` routine performs a matrix-vector operation defined as :math:`y := A*x + y`,
+    where `x` and `y` are vectors, `A` is an m-by-k sparse matrix in the CSR format.
 
     Parameters
+
     ----------
-    data : tvm.contrib.CSRTensor
-        2-D with shape [batch, in_dim]
+    a : tvm.contrib.sparse.CSRNDArray
+        2-D sparse matrix with shape [m, k]
 
-    weight : tvm.Tensor
-        2-D with shape [out_dim, in_dim]
+    x : tvm.Tensor
+        2-D dense matrix with shape [k, 1]
 
-    bias : tvm.Tensor, optional
-        1-D with shape [out_dim]
+    y : tvm.Tensor, optional
+        1-D dense vector with shape [1]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [batch, out_dim]
+        2-D dense matrix with shape [m, 1]
     """
-    return csrmv_default(data.data, data.indices, data.indptr, weight, bias)
+    return csrmv_default(a.data, a.indices, a.indptr, x, y)
diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index 4cd43b8d900d..b0c347ae3119 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -77,7 +77,7 @@ def dense(data, weight, bias=None):
 
     Parameters
     ----------
-    data : tvm.contrib.CSRTensor
+    data : tvm.contrib.sparse.CSRNDArray
         2-D with shape [batch, in_dim]
 
     weight : tvm.Tensor
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 2480f8ac8b38..8ff211d118bd 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -47,6 +47,9 @@ def check_device(device):
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros((_nr, 1), dtype=dtype), ctx)
+        assert a.data.dtype == A.data.dtype
+        assert a.indices.dtype == A.indices.dtype
+        assert a.indptr.dtype == A.indptr.dtype
         f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmv")
         f(_nr, a.data, a.indices, a.indptr, b, c, d)
         print(d.asnumpy().T)

From df165b482630792091ec475be96c363b164444a3 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 27 Jul 2018 10:45:43 +0800
Subject: [PATCH 29/36] initial support for dense operator with sparse weights;

---
 topi/python/topi/sparse/dense.py      | 99 ++++++++++++++++++++++++---
 topi/tests/python/test_topi_sparse.py | 69 ++++++++++++++++---
 2 files changed, 150 insertions(+), 18 deletions(-)

diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index b0c347ae3119..ee20a6afa5bd 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -4,9 +4,9 @@
 from .. import tag
 from ..util import simplify
 
-def dense_default(data, indices, indptr, weight, bias=None):
+def dense_si(data, indices, indptr, weight, bias=None):
     # pylint: disable=invalid-name
-    """The default implementation of dense in topi.
+    """The implementation of dense in topi, assuming sparse input.
 
     Parameters
     ----------
@@ -17,18 +17,18 @@ def dense_default(data, indices, indptr, weight, bias=None):
         1-D with shape [num_nonzeros]
 
     indptr : tvm.Tensor
-        1-D with shape [M+1]
+        1-D with shape [m+1]
 
     weight : tvm.Tensor
-        2-D with shape [K, N]
+        2-D with shape [k, n]
 
     bias : tvm.Tensor, optional
-        1-D with shape [M]
+        1-D with shape [m]
 
     Returns
     -------
     output : tvm.Tensor
-        2-D with shape [M, N]
+        2-D with shape [m, n]
     """
     assert len(data.shape) == 1 and len(indices.shape) == 1 and len(indptr.shape) == 1 \
         and len(weight.shape) == 2, "only support 2-dim dense"
@@ -72,18 +72,87 @@ def dense_default_ir(data, indices, indptr, weight, out):
     return matmul
 
 
+def dense_sw(data, w_data, w_indices, w_indptr, bias=None):
+    # pylint: disable=invalid-name
+    """The implementation of dense in topi, assuming sparse weight.
+
+    Parameters
+    ----------
+    data : tvm.Tensor
+        2-D with shape [m, k]
+
+    w_data : tvm.Tensor
+        1-D with shape [nonzeros]
+
+    w_indices : tvm.Tensor
+        1-D with shape [nonzeros]
+
+    w_indptr : tvm.Tensor
+        1-D with shape [n+1]
+
+    bias : tvm.Tensor, optional
+        1-D with shape [n]
+
+    Returns
+    -------
+    output : tvm.Tensor
+        2-D with shape [m, n]
+    """
+    assert len(w_data.shape) == 1 and len(w_indices.shape) == 1 and len(w_indptr.shape) == 1 \
+        and len(data.shape) == 2, "only support 2-dim dense"
+    assert isinstance(weight, tvm.tensor.Tensor), \
+        "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+    if bias is not None:
+        assert len(bias.shape) == 1
+    dtype = data.dtype
+    M, K = data.shape
+    N = simplify(w_indptr.shape[0]-1)
+    def dense_default_ir(data, indices, indptr, weight, out):
+        """Define IR for Dense"""
+        dtype = data.dtype
+        irb = tvm.ir_builder.create()
+        data_ptr = irb.buffer_ptr(data)
+        w_data_ptr = irb.buffer_ptr(w_data)
+        w_indices_ptr = irb.buffer_ptr(w_indices)
+        w_indptr_ptr = irb.buffer_ptr(w_indptr)
+        out_ptr = irb.buffer_ptr(out)
+        M, K = data.shape
+        N = simplify(w_indptr.shape[0]-1)
+        with irb.for_range(0, N, for_type="vectorize", name='n') as n:
+            with irb.for_range(0, M, for_type="parallel", name='m') as m:
+                dot = irb.allocate(dtype, (1,), name='dot', scope='local')
+                out_ptr[m*N+n] = tvm.const(0, dtype)
+                dot[0] = tvm.const(0, dtype)
+                row_start = w_indptr_ptr[m]
+                row_elems = w_indptr_ptr[m+1]-row_start
+                with irb.for_range(0, row_elems, name='k') as k:
+                    elem = row_start+k
+                    dot[0] += w_data_ptr[elem] * data_ptr[w_indices_ptr[elem]+n*K]
+                out_ptr[m*N+n] += dot[0]
+        return irb.get()
+    oshape = (M, N)
+    matmul = tvm.extern(oshape, [data, indices, indptr, weight],
+                        lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
+                        tag="dense", dtype=dtype, name='out')
+    if bias is not None:
+        matmul = tvm.compute(oshape, lambda i, j: matmul[i, j] + bias[j], \
+                             tag=tag.BROADCAST)
+    return matmul
+
+
 def dense(data, weight, bias=None):
     """Applies a linear transformation: :math:`Y = XW^T + b`.
+    Either data or weight should be tvm.contrib.sparse.CSRNDArray.
 
     Parameters
     ----------
-    data : tvm.contrib.sparse.CSRNDArray
+    data : tvm.contrib.sparse.CSRNDArray or tvm.tensor.Tensor
         2-D with shape [batch, in_dim]
 
-    weight : tvm.Tensor
+    weight : tvm.tensor.Tensor or tvm.contrib.sparse.CSRNDArray
         2-D with shape [out_dim, in_dim]
 
-    bias : tvm.Tensor, optional
+    bias : tvm.tensor.Tensor, optional
         1-D with shape [out_dim]
 
     Returns
@@ -91,4 +160,14 @@ def dense(data, weight, bias=None):
     output : tvm.Tensor
         2-D with shape [batch, out_dim]
     """
-    return dense_default(data.data, data.indices, data.indptr, weight, bias)
+    ret = None
+    if isinstance(data, tvm.contrib.sparse.CSRPlaceholderOp) and \
+       isinstance(weight, tvm.tensor.Tensor):
+        ret = dense_si(data.data, data.indices, data.indptr, weight, bias)
+    elif isinstance(data, tvm.tensor.Tensor) and \
+       isinstance(weight, tvm.contrib.sparse.CSRPlaceholderOp):
+        ret = dense_sw(data, weight.data, weight.indices, weight.indptr, bias)
+    else:
+        raise NotImplementedError("implementation for %s as data and %s as weights, "
+        "is not supported yet." % (type(data), type(weight), ))
+    return ret
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 8ff211d118bd..13ce9348b494 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -103,7 +103,47 @@ def check_device(device):
     for device in ["llvm"]:
         check_device(device)
 
-def verify_dense(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
+def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
+    nonzeros = tvm.var('nonzeros')
+    A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A')
+    B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B')
+    C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
+    D = topi.sparse.dense(A, B, C if use_bias else None)
+    s = tvm.create_schedule(D.op)
+    print(tvm.lower(s, [A.data, A.indices, A.indptr, B, C], simple_mode=True))
+
+    # get the test data
+    def get_ref_data():
+        mag = 10.
+        a_np = np.maximum(mag*(np.random.uniform(size=(batch, in_dim)).astype('float32')-0.5), 0.).astype(dtype)
+        b_np = (mag*(np.random.uniform(size=(out_dim, in_dim)).astype('float32')-.5)).astype(dtype)
+        c_np = (mag*(np.random.uniform(size=(out_dim,)).astype('float32')-.5)).astype(dtype)
+        if use_bias:
+            d_np = np.dot(a_np, b_np.T) + c_np
+        else:
+            d_np = np.dot(a_np, b_np.T)
+        return (a_np, b_np, c_np, d_np)
+    a_np, b_np, c_np, d_np = get_ref_data()
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        a = tvmsp.array(a_np, ctx)
+        b = tvm.nd.array(b_np, ctx)
+        c = tvm.nd.array(c_np, ctx)
+        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
+        f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
+        f(a.data, a.indices, a.indptr, b, c, d)
+        print(d.asnumpy())
+        print(d_np)
+        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+
+    check_device('llvm')
+
+def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
     nonzeros = tvm.var('nonzeros')
     A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A')
     B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B')
@@ -152,14 +192,27 @@ def test_csrmm():
     verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=False)
     verify_dynamic_csrmm(batch=M, in_dim=K, out_dim=N, use_bias=True)
 
-def test_dense():
+def test_dense_si():
+    M, K, N = 3, 5, 2
+    verify_dense_si(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='float32')
+    verify_dense_si(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='float32')
+    verify_dense_si(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int32')
+    verify_dense_si(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int32')
+    verify_dense_si(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int16')
+    verify_dense_si(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int16')
+
+def test_dense_sw():
     M, K, N = 3, 5, 2
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='float32')
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='float32')
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int32')
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int32')
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int16')
-    verify_dense(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int16')
+    verify_dense_sw(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='float32')
+    verify_dense_sw(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='float32')
+    verify_dense_sw(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int32')
+    verify_dense_sw(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int32')
+    verify_dense_sw(batch=M, in_dim=K, out_dim=N, use_bias=False, dtype='int16')
+    verify_dense_sw(batch=M, in_dim=K, out_dim=N, use_bias=True, dtype='int16')
+
+def test_dense():
+    test_dense_si()
+    test_dense_sw()
 
 if __name__ == "__main__":
     test_csrmv()

From b1a1da52ae157171d59bf151324b3e209dc087cc Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 27 Jul 2018 13:21:35 +0800
Subject: [PATCH 30/36] bug fix in sparse-weight version of dense operator;\

---
 topi/python/topi/sparse/dense.py      | 20 ++++++++++----------
 topi/tests/python/test_topi_sparse.py | 26 ++++++++------------------
 2 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index ee20a6afa5bd..63dab5fb0051 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -100,14 +100,14 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None):
     """
     assert len(w_data.shape) == 1 and len(w_indices.shape) == 1 and len(w_indptr.shape) == 1 \
         and len(data.shape) == 2, "only support 2-dim dense"
-    assert isinstance(weight, tvm.tensor.Tensor), \
-        "weight matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(weight))
+    assert isinstance(data, tvm.tensor.Tensor), \
+        "data matrix is assumed to be tvm.Tensor, but weight is `%s`" % (type(data))
     if bias is not None:
         assert len(bias.shape) == 1
     dtype = data.dtype
     M, K = data.shape
     N = simplify(w_indptr.shape[0]-1)
-    def dense_default_ir(data, indices, indptr, weight, out):
+    def dense_default_ir(data, w_data, w_indices, w_indptr, out):
         """Define IR for Dense"""
         dtype = data.dtype
         irb = tvm.ir_builder.create()
@@ -118,20 +118,20 @@ def dense_default_ir(data, indices, indptr, weight, out):
         out_ptr = irb.buffer_ptr(out)
         M, K = data.shape
         N = simplify(w_indptr.shape[0]-1)
-        with irb.for_range(0, N, for_type="vectorize", name='n') as n:
-            with irb.for_range(0, M, for_type="parallel", name='m') as m:
+        with irb.for_range(0, M, for_type="vectorize", name='m') as m:
+            with irb.for_range(0, N, for_type="parallel", name='n') as n:
                 dot = irb.allocate(dtype, (1,), name='dot', scope='local')
                 out_ptr[m*N+n] = tvm.const(0, dtype)
                 dot[0] = tvm.const(0, dtype)
-                row_start = w_indptr_ptr[m]
-                row_elems = w_indptr_ptr[m+1]-row_start
+                row_start = w_indptr_ptr[n]
+                row_elems = w_indptr_ptr[n+1]-row_start
                 with irb.for_range(0, row_elems, name='k') as k:
                     elem = row_start+k
-                    dot[0] += w_data_ptr[elem] * data_ptr[w_indices_ptr[elem]+n*K]
+                    dot[0] += w_data_ptr[elem] * data_ptr[w_indices_ptr[elem]+m*K]
                 out_ptr[m*N+n] += dot[0]
         return irb.get()
     oshape = (M, N)
-    matmul = tvm.extern(oshape, [data, indices, indptr, weight],
+    matmul = tvm.extern(oshape, [data, w_data, w_indices, w_indptr],
                         lambda ins, outs: dense_default_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
                         tag="dense", dtype=dtype, name='out')
     if bias is not None:
@@ -169,5 +169,5 @@ def dense(data, weight, bias=None):
         ret = dense_sw(data, weight.data, weight.indices, weight.indptr, bias)
     else:
         raise NotImplementedError("implementation for %s as data and %s as weights, "
-        "is not supported yet." % (type(data), type(weight), ))
+                                  "is not supported yet." % (type(data), type(weight), ))
     return ret
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index 13ce9348b494..d7ddb93dd057 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -52,8 +52,6 @@ def check_device(device):
         assert a.indptr.dtype == A.indptr.dtype
         f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmv")
         f(_nr, a.data, a.indices, a.indptr, b, c, d)
-        print(d.asnumpy().T)
-        print(d_np.T)
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4)
 
     for device in ["llvm"]:
@@ -96,8 +94,6 @@ def check_device(device):
         f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmm")
 
         f(_nr, a.data, a.indices, a.indptr, b, c, d)
-        print(d.asnumpy().T)
-        print(d_np.T)
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-2)
 
     for device in ["llvm"]:
@@ -110,7 +106,6 @@ def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
     C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
     D = topi.sparse.dense(A, B, C if use_bias else None)
     s = tvm.create_schedule(D.op)
-    print(tvm.lower(s, [A.data, A.indices, A.indptr, B, C], simple_mode=True))
 
     # get the test data
     def get_ref_data():
@@ -137,26 +132,23 @@ def check_device(device):
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
         f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
         f(a.data, a.indices, a.indptr, b, c, d)
-        print(d.asnumpy())
-        print(d_np)
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
     check_device('llvm')
 
 def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
     nonzeros = tvm.var('nonzeros')
-    A = tvmsp.placeholder(shape=(batch, in_dim), nonzeros=nonzeros, dtype=dtype, name='A')
-    B = tvm.placeholder((out_dim, in_dim), dtype=dtype, name='B')
+    A = tvm.placeholder((batch, in_dim), dtype=dtype, name='A')
+    B = tvmsp.placeholder(shape=(out_dim, in_dim), nonzeros=nonzeros, dtype=dtype, name='B')
     C = tvm.placeholder((out_dim,), dtype=dtype, name='C')
     D = topi.sparse.dense(A, B, C if use_bias else None)
     s = tvm.create_schedule(D.op)
-    print(tvm.lower(s, [A.data, A.indices, A.indptr, B, C], simple_mode=True))
 
     # get the test data
     def get_ref_data():
         mag = 10.
-        a_np = np.maximum(mag*(np.random.uniform(size=(batch, in_dim)).astype('float32')-0.5), 0.).astype(dtype)
-        b_np = (mag*(np.random.uniform(size=(out_dim, in_dim)).astype('float32')-.5)).astype(dtype)
+        a_np = (mag*(np.random.uniform(size=(batch, in_dim)).astype('float32')-.5)).astype(dtype)
+        b_np = np.maximum(mag*(np.random.uniform(size=(out_dim, in_dim)).astype('float32')-0.5), 0.).astype(dtype)
         c_np = (mag*(np.random.uniform(size=(out_dim,)).astype('float32')-.5)).astype(dtype)
         if use_bias:
             d_np = np.dot(a_np, b_np.T) + c_np
@@ -171,14 +163,12 @@ def check_device(device):
             print("Skip because %s is not enabled" % device)
             return
         print("Running on target: %s" % device)
-        a = tvmsp.array(a_np, ctx)
-        b = tvm.nd.array(b_np, ctx)
+        a = tvm.nd.array(a_np, ctx)
+        b = tvmsp.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
-        f(a.data, a.indices, a.indptr, b, c, d)
-        print(d.asnumpy())
-        print(d_np)
+        f = tvm.build(s, [A, B.data, B.indices, B.indptr, C, D], device, name="dense")
+        f(a, b.data, b.indices, b.indptr, c, d)
         np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
 
     check_device('llvm')

From a62050d3d1b0704b4aa3776d1b1aa23fc031d28d Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 27 Jul 2018 13:38:57 +0800
Subject: [PATCH 31/36] satisfy the linter;

---
 topi/python/topi/sparse/dense.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/topi/python/topi/sparse/dense.py b/topi/python/topi/sparse/dense.py
index 63dab5fb0051..01f323bc8ce9 100644
--- a/topi/python/topi/sparse/dense.py
+++ b/topi/python/topi/sparse/dense.py
@@ -105,7 +105,7 @@ def dense_sw(data, w_data, w_indices, w_indptr, bias=None):
     if bias is not None:
         assert len(bias.shape) == 1
     dtype = data.dtype
-    M, K = data.shape
+    M, _ = data.shape
     N = simplify(w_indptr.shape[0]-1)
     def dense_default_ir(data, w_data, w_indices, w_indptr, out):
         """Define IR for Dense"""

From 915d3bd5908d58fb2dd22d1e99faa7c9afeb3538 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Thu, 16 Aug 2018 19:37:02 +0800
Subject: [PATCH 32/36] update according to the comments;

---
 python/tvm/contrib/sparse.py | 57 +++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 6729c3698beb..0f0fcfcf978e 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -26,6 +26,9 @@ def __init__(self, arg1, ctx=None, shape=None):
 
         ctx: tvm.TVMContext
             The corresponding context.
+
+        shape : tuple of int
+            The shape of the array
         """
         if isinstance(arg1, tuple):
             self.data, self.indices, self.indptr = arg1[0], arg1[1], arg1[2]
@@ -57,22 +60,27 @@ def __init__(self, arg1, ctx=None, shape=None):
 
     def asnumpy(self):
         """Construct a full matrix and convert it to numpy array."""
-        # itype = 'int64'
         full = _np.zeros(self.shape, self.dtype)
         ridx = _np.diff(self.indptr.asnumpy())
         ridx = _np.hstack((_np.ones((v,), itype)*i for i, v in enumerate(ridx)))
         full[ridx, self.indices.asnumpy().astype(itype)] = self.data.asnumpy()
         return full
 
-def array(source_array, ctx=None, shape=None):
-    """Construct a CSRNDArray from numpy.ndarray"""
-    return CSRNDArray(source_array, shape=shape, ctx=ctx)
+def array(source_array, ctx=None, shape=None, stype='csr'):
+    """Construct a sparse NDArray from numpy.ndarray"""
+    ret = None
+    if stype == 'csr':
+        ret = CSRNDArray(source_array, shape=shape, ctx=ctx)
+    else:
+        raise NotImplementedError('stype=%s is not supported yet.' % (stype,))
+    return ret
 
 @register_node
-class CSRPlaceholderOp(object):
-    """Placeholder class for CSR based sparse tensor representation."""
-    def __init__(self, shape, nonzeros, dtype, name, stype):
-        """Contructing a bare bone structure for a csr_matrix
+class SparsePlaceholderOp(object):
+    """Placeholder class for sparse tensor representations."""
+    def __init__(self, shape, nonzeros, dtype, name):
+        # pylint: disable=unused-argument
+        """Contructing a bare bone structure for a sparse matrix
 
         Parameters
         ----------
@@ -84,15 +92,31 @@ def __init__(self, shape, nonzeros, dtype, name, stype):
 
         name: str, optional
             The name hint of the tensor
-
-        stype: str, optional
-            The storage type of the tensor
         """
-        # itype = 'int64'
         self.shape = shape
         self.dtype = dtype
         self.name = name
-        self.stype = stype
+        self.stype = 'unknown'
+
+@register_node
+class CSRPlaceholderOp(SparsePlaceholderOp):
+    """Placeholder class for CSR based sparse tensor representation."""
+    def __init__(self, shape, nonzeros, dtype, name):
+        """Contructing a bare bone structure for a csr_matrix
+
+        Parameters
+        ----------
+        shape: Tuple of Expr
+            The shape of the tensor
+
+        dtype: str, optional
+            The data type of the tensor
+
+        name: str, optional
+            The name hint of the tensor
+        """
+        SparsePlaceholderOp.__init__(self, shape, nonzeros, dtype, name)
+        self.stype = 'csr'
         self.data = _api.placeholder((nonzeros,), dtype=dtype, name=self.name+'_data')
         self.indices = _api.placeholder((nonzeros,), dtype=itype, name=self.name+'_indices')
         self.indptr = _api.placeholder((self.shape[0]+1,), dtype=itype, name=self.name+'_indptr')
@@ -123,4 +147,9 @@ def placeholder(shape, nonzeros=None, dtype=None, name="placeholder", stype=None
     nonzeros = 0 if nonzeros is None else nonzeros
     dtype = float32 if dtype is None else dtype
     stype = csr if stype is None else stype
-    return CSRPlaceholderOp(shape=shape, nonzeros=nonzeros, dtype=dtype, name=name, stype=stype)
+    ret = None
+    if stype == 'csr':
+        ret = CSRPlaceholderOp(shape=shape, nonzeros=nonzeros, dtype=dtype, name=name)
+    else:
+        raise NotImplementedError('stype=%s is not supported yet.' % (stype,))
+    return ret

From 743558b3ead4b998ffee978f50687c1131eed0e0 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@icloud.com>
Date: Fri, 17 Aug 2018 19:46:21 +0800
Subject: [PATCH 33/36] Update sparse.py

---
 python/tvm/contrib/sparse.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 0f0fcfcf978e..2b125260de95 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -138,10 +138,13 @@ def placeholder(shape, nonzeros=None, dtype=None, name="placeholder", stype=None
     name: str, optional
         The name hint of the tensor
 
+    stype: str, optional
+        The name storage type of the sparse tensor (e.g. csr, coo, ell)
+
     Returns
     -------
-    tensor: CSRNDArray
-        The created tensor
+    tensor: SparsePlaceholderOp
+        The created sparse tensor placeholder
     """
     shape = (shape,) if isinstance(shape, _expr.Expr) else shape
     nonzeros = 0 if nonzeros is None else nonzeros

From 98207fbb96c8f2618cb88db1fa78e307c107e08f Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Tue, 21 Aug 2018 11:15:53 +0800
Subject: [PATCH 34/36] remove register_node declaration and path assignment in
 testing code;

---
 python/tvm/contrib/sparse.py          | 3 ---
 tests/python/contrib/test_sparse.py   | 4 ----
 topi/tests/python/test_topi_sparse.py | 5 -----
 3 files changed, 12 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 2b125260de95..7fd17515fe2c 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -12,7 +12,6 @@
 csr = "csr"
 itype = 'int32'
 
-@register_node
 class CSRNDArray(object):
     """Sparse tensor object in CSR format."""
     def __init__(self, arg1, ctx=None, shape=None):
@@ -75,7 +74,6 @@ def array(source_array, ctx=None, shape=None, stype='csr'):
         raise NotImplementedError('stype=%s is not supported yet.' % (stype,))
     return ret
 
-@register_node
 class SparsePlaceholderOp(object):
     """Placeholder class for sparse tensor representations."""
     def __init__(self, shape, nonzeros, dtype, name):
@@ -98,7 +96,6 @@ def __init__(self, shape, nonzeros, dtype, name):
         self.name = name
         self.stype = 'unknown'
 
-@register_node
 class CSRPlaceholderOp(SparsePlaceholderOp):
     """Placeholder class for CSR based sparse tensor representation."""
     def __init__(self, shape, nonzeros, dtype, name):
diff --git a/tests/python/contrib/test_sparse.py b/tests/python/contrib/test_sparse.py
index 1d1ab3d549f7..f7a0d1d137a5 100644
--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -1,7 +1,3 @@
-import os, sys
-thisdir = os.path.dirname(os.path.abspath(__file__))
-sys.path.insert(0, os.path.join(thisdir, '../../../python'))
-
 import tvm
 import tvm.contrib.sparse as tvmsp
 import tvm.ndarray as _nd
diff --git a/topi/tests/python/test_topi_sparse.py b/topi/tests/python/test_topi_sparse.py
index d7ddb93dd057..deb3a08ea01b 100644
--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -1,9 +1,4 @@
 """Test code for sparse operator"""
-import os, sys
-thisdir = os.path.dirname(os.path.abspath(__file__))
-sys.path.insert(0, os.path.join(thisdir, '../../../python'))
-sys.path.insert(0, os.path.join(thisdir, '../../python'))
-
 import numpy as np
 import tvm
 import topi

From 6becfb917e8a5413fd001db83e1ee16d90191288 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Tue, 21 Aug 2018 12:42:54 +0800
Subject: [PATCH 35/36] satisfy the linter;

---
 python/tvm/autotvm/task/dispatcher.py | 2 +-
 python/tvm/contrib/sparse.py          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/tvm/autotvm/task/dispatcher.py b/python/tvm/autotvm/task/dispatcher.py
index 93f6d584abfa..bc059988b40b 100644
--- a/python/tvm/autotvm/task/dispatcher.py
+++ b/python/tvm/autotvm/task/dispatcher.py
@@ -16,8 +16,8 @@
 
 import logging
 
-from decorator import decorate
 import numpy as np
+from decorator import decorate
 
 from tvm import target as _target
 
diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 7fd17515fe2c..9bd2e4076301 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -2,7 +2,6 @@
 # pylint: disable=invalid-name
 from __future__ import absolute_import as _abs
 import numpy as _np
-from .._ffi.node import register_node
 from .. import expr as _expr
 from .. import api as _api
 from .. import tensor as _tensor

From 727b32b2c95fa83d863949473f7685d9f9967957 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfu.chen@harman.com>
Date: Fri, 24 Aug 2018 18:47:43 +0800
Subject: [PATCH 36/36] update regarding to the comments;

---
 python/tvm/contrib/sparse.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/python/tvm/contrib/sparse.py b/python/tvm/contrib/sparse.py
index 0f0fcfcf978e..e0a7cdadc06a 100644
--- a/python/tvm/contrib/sparse.py
+++ b/python/tvm/contrib/sparse.py
@@ -9,7 +9,6 @@
 from .. import ndarray as _nd
 
 float32 = "float32"
-csr = "csr"
 itype = 'int32'
 
 @register_node
@@ -31,7 +30,8 @@ def __init__(self, arg1, ctx=None, shape=None):
             The shape of the array
         """
         if isinstance(arg1, tuple):
-            self.data, self.indices, self.indptr = arg1[0], arg1[1], arg1[2]
+            assert len(arg1) == 3
+            self.data, self.indices, self.indptr = arg1
             self.shape = shape
         elif isinstance(arg1, _np.ndarray):
             source_array = arg1
@@ -87,6 +87,9 @@ def __init__(self, shape, nonzeros, dtype, name):
         shape: Tuple of Expr
             The shape of the tensor
 
+        nonzeros: int
+            The number of non-zero values
+
         dtype: str, optional
             The data type of the tensor
 
@@ -109,6 +112,9 @@ def __init__(self, shape, nonzeros, dtype, name):
         shape: Tuple of Expr
             The shape of the tensor
 
+        nonzeros: int
+            The number of non-zero values
+
         dtype: str, optional
             The data type of the tensor
 
@@ -132,6 +138,9 @@ def placeholder(shape, nonzeros=None, dtype=None, name="placeholder", stype=None
     shape: Tuple of Expr
         The shape of the tensor
 
+    nonzeros: int
+        The number of non-zero values
+
     dtype: str, optional
         The data type of the tensor
 
@@ -146,7 +155,7 @@ def placeholder(shape, nonzeros=None, dtype=None, name="placeholder", stype=None
     shape = (shape,) if isinstance(shape, _expr.Expr) else shape
     nonzeros = 0 if nonzeros is None else nonzeros
     dtype = float32 if dtype is None else dtype
-    stype = csr if stype is None else stype
+    stype = 'csr' if stype is None else stype
     ret = None
     if stype == 'csr':
         ret = CSRPlaceholderOp(shape=shape, nonzeros=nonzeros, dtype=dtype, name=name)