Basic tests all pass, fix offset to data buffer.

apache · Mar 17, 2020 · df612a1 · df612a1
1 parent 06eedf4
commit df612a1
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 23 deletions.
diff --git a/python/tvm/relay/transform/memory_plan.py b/python/tvm/relay/transform/memory_plan.py
@@ -20,7 +20,7 @@
 """
 import attr
 import numpy as np
-from typing import Optional
+from typing import Optional, Dict
 
 from ..expr_functor import ExprMutator
 from ..scope_builder import ScopeBuilder
@@ -40,8 +40,9 @@ class Region:
     size: expr.Expr
     alignment: Optional[expr.Expr]
     dtype: Optional[str]
+    offsets: Dict[expr.Var, expr.Expr] = {}
 
-    def grow(self, size: expr.Expr, alignment: expr.Expr, dtype: str) -> None:
+    def grow(self, old_storage: expr.Var, size: expr.Expr, alignment: expr.Expr, dtype: str) -> None:
         if self.dtype:
             assert self.dtype == dtype, "must have matching dtypes in a region"
         else:
@@ -52,10 +53,10 @@ def grow(self, size: expr.Expr, alignment: expr.Expr, dtype: str) -> None:
         else:
             self.alignment = alignment
 
-        self.size = self.size + size
+        # Record the offset at which we allocate the storage.
+        self.offsets[old_storage] = self.size
 
-    def next_offset(self) -> None:
-        return self.size + expr.const(1, dtype="int64")
+        self.size = self.size + size
 
     def to_expr(self) -> expr.Expr:
         return op.memory.alloc_storage(self.size, self.alignment, self.dtype)
@@ -136,14 +137,14 @@ def process_alloc_storage(self, lhs, call):
         size, alignment = call.args
         dtype = call.attrs.dtype
         region = self.current_region()
-        region.grow(size, alignment, dtype)
+        region.grow(lhs, size, alignment, dtype)
         return lhs, region.var
 
     def process_alloc_tensor(self, lhs, call):
         region = self.current_region()
-        offset = region.next_offset()
-        _storage, old_offset, shape = call.args
-        assert np.asscalar(old_offset.data.asnumpy()) == 0, "no offsets should yet be allocated"
+        storage, old_offset, shape = call.args
+        offset = region.offsets[storage]
+        assert old_offset.data.asnumpy().item() == 0, "no offsets should yet be allocated"
         return lhs, expr.Call(call.op, [region.var, offset, shape], call.attrs, call.type_args)
 
 
@@ -181,6 +182,7 @@ def transform_function(self, func, mod, _):
         func = eval_const(mod, func)
         ea = MemoryPlanPass()
         func = ea.visit(func)
+        print(func)
         return func
 
 

diff --git a/src/runtime/vm/memory_manager.cc b/src/runtime/vm/memory_manager.cc
@@ -76,8 +76,6 @@ inline size_t GetDataAlignment(const DLTensor& arr) {
 }
 
 NDArray StorageObj::AllocNDArray(size_t offset, std::vector<int64_t> shape, DLDataType dtype) {
-  // TODO(@jroesch): generalize later to non-overlapping allocations.
-  CHECK_EQ(offset, 0u);
   VerifyDataType(dtype);
 
   // crtical zone: allocate header, cannot throw
@@ -86,14 +84,28 @@ NDArray StorageObj::AllocNDArray(size_t offset, std::vector<int64_t> shape, DLDa
   container->SetDeleter(StorageObj::Deleter);
   size_t needed_size = GetDataSize(container->dl_tensor);
   this->IncRef();
+  // The manager context pointer must continue to point to the storage object
+  // which owns the backing memory, and keeps track of the reference count.
+  //
+  // When we free a container we extract the storage object, decrement its
+  // reference count, then destroy the container, but leave the underlying
+  // buffer intact.
   container->manager_ctx = reinterpret_cast<void*>(this);
-  container->dl_tensor.data = this->buffer.data;
-  NDArray ret(GetObjectPtr<Object>(container));
 
+  // is this UB?
+  // The only change we make w.r.t offset is modifying the data pointer
+  // of the backing tensor to point into the buffer instead of its start.
+  auto offset_ptr = reinterpret_cast<uint8_t*>(this->buffer.data) + offset;
+  container->dl_tensor.data = reinterpret_cast<void*>(offset_ptr);
+
+  NDArray ret(GetObjectPtr<Object>(container));
   // RAII in effect, now run the check.
-  // TODO(@jroesch): generalize later to non-overlapping allocations.
-  CHECK(needed_size == this->buffer.size)
-    << "size mistmatch required " << needed_size << " found " << this->buffer.size;
+
+  CHECK(offset + needed_size <= this->buffer.size)
+    << "storage allocation failure, attempted to allocate "
+    << needed_size << " at offset "
+    << offset << " in region that is "
+    << this->buffer.size << "bytes";
 
   return ret;
 }

diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc
@@ -86,13 +86,15 @@ Instruction::Instruction(const Instruction& instr) {
       return;
     case Opcode::AllocTensor:
       this->alloc_tensor.storage = instr.alloc_tensor.storage;
+      this->alloc_tensor.offset = instr.alloc_tensor.offset;
       this->alloc_tensor.ndim = instr.alloc_tensor.ndim;
       this->alloc_tensor.shape = Duplicate<int64_t>(instr.alloc_tensor.shape,
                                                     instr.alloc_tensor.ndim);
       this->alloc_tensor.dtype = instr.alloc_tensor.dtype;
       return;
     case Opcode::AllocTensorReg:
       this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage;
+      this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset;
       this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register;
       this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype;
       return;
@@ -176,13 +178,15 @@ Instruction& Instruction::operator=(const Instruction& instr) {
       return *this;
     case Opcode::AllocTensor:
       this->alloc_tensor.storage = this->alloc_tensor.storage;
+      this->alloc_tensor.offset = instr.alloc_tensor.offset;
       this->alloc_tensor.ndim = instr.alloc_tensor.ndim;
       this->alloc_tensor.shape = Duplicate<int64_t>(instr.alloc_tensor.shape,
                                                     instr.alloc_tensor.ndim);
       this->alloc_tensor.dtype = instr.alloc_tensor.dtype;
       return *this;
     case Opcode::AllocTensorReg:
       this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage;
+      this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset;
       this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register;
       this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype;
       return *this;

diff --git a/tests/python/relay/test_memory_passes.py b/tests/python/relay/test_memory_passes.py
@@ -44,7 +44,7 @@ def check_memory_plan(func, check_fn):
     py_res = check_fn(*[arg.asnumpy() for arg in args])
 
     # First check that the two VM results agree.
-    np.testing_assert_allclose(
+    np.testing.assert_allclose(
         no_plan_result.asnumpy(),
         plan_result.asnumpy())
 
@@ -91,15 +91,18 @@ def test_add_sub():
     func = relay.Function([x, y], z)
     check_memory_plan(func, check_add_sub)
 
+def check_no_fuse(x, y, w):
+    z = x + y
+    return np.matmul(z, np.transpose(w))
+
 def test_no_fuse():
-    x = relay.var('x', shape=(10,))
-    y = relay.var('y', shape=(10,))
-    w = relay.var('w', shape=(10, 10))
-    z = x + x
-    z = z - y
+    x = relay.var('x', shape=(5, 1))
+    y = relay.var('y', shape=(5, 1))
+    w = relay.var('w', shape=(5, 1))
+    z = x + y
     out = relay.op.nn.dense(z, w)
     func = relay.Function([x, y, w], out)
-    check_memory_plan(func, check_add_sub)
+    check_memory_plan(func, check_no_fuse)
 
 if __name__ == "__main__":
     test_tyck_alloc_tensor()