Skip to content

Commit

Permalink
Basic tests all pass, fix offset to data buffer.
Browse files Browse the repository at this point in the history
  • Loading branch information
jroesch committed Mar 17, 2020
1 parent 06eedf4 commit df612a1
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 23 deletions.
20 changes: 11 additions & 9 deletions python/tvm/relay/transform/memory_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"""
import attr
import numpy as np
from typing import Optional
from typing import Optional, Dict

from ..expr_functor import ExprMutator
from ..scope_builder import ScopeBuilder
Expand All @@ -40,8 +40,9 @@ class Region:
size: expr.Expr
alignment: Optional[expr.Expr]
dtype: Optional[str]
offsets: Dict[expr.Var, expr.Expr] = {}

def grow(self, size: expr.Expr, alignment: expr.Expr, dtype: str) -> None:
def grow(self, old_storage: expr.Var, size: expr.Expr, alignment: expr.Expr, dtype: str) -> None:
if self.dtype:
assert self.dtype == dtype, "must have matching dtypes in a region"
else:
Expand All @@ -52,10 +53,10 @@ def grow(self, size: expr.Expr, alignment: expr.Expr, dtype: str) -> None:
else:
self.alignment = alignment

self.size = self.size + size
# Record the offset at which we allocate the storage.
self.offsets[old_storage] = self.size

def next_offset(self) -> None:
return self.size + expr.const(1, dtype="int64")
self.size = self.size + size

def to_expr(self) -> expr.Expr:
return op.memory.alloc_storage(self.size, self.alignment, self.dtype)
Expand Down Expand Up @@ -136,14 +137,14 @@ def process_alloc_storage(self, lhs, call):
size, alignment = call.args
dtype = call.attrs.dtype
region = self.current_region()
region.grow(size, alignment, dtype)
region.grow(lhs, size, alignment, dtype)
return lhs, region.var

def process_alloc_tensor(self, lhs, call):
region = self.current_region()
offset = region.next_offset()
_storage, old_offset, shape = call.args
assert np.asscalar(old_offset.data.asnumpy()) == 0, "no offsets should yet be allocated"
storage, old_offset, shape = call.args
offset = region.offsets[storage]
assert old_offset.data.asnumpy().item() == 0, "no offsets should yet be allocated"
return lhs, expr.Call(call.op, [region.var, offset, shape], call.attrs, call.type_args)


Expand Down Expand Up @@ -181,6 +182,7 @@ def transform_function(self, func, mod, _):
func = eval_const(mod, func)
ea = MemoryPlanPass()
func = ea.visit(func)
print(func)
return func


Expand Down
26 changes: 19 additions & 7 deletions src/runtime/vm/memory_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ inline size_t GetDataAlignment(const DLTensor& arr) {
}

NDArray StorageObj::AllocNDArray(size_t offset, std::vector<int64_t> shape, DLDataType dtype) {
// TODO(@jroesch): generalize later to non-overlapping allocations.
CHECK_EQ(offset, 0u);
VerifyDataType(dtype);

// crtical zone: allocate header, cannot throw
Expand All @@ -86,14 +84,28 @@ NDArray StorageObj::AllocNDArray(size_t offset, std::vector<int64_t> shape, DLDa
container->SetDeleter(StorageObj::Deleter);
size_t needed_size = GetDataSize(container->dl_tensor);
this->IncRef();
// The manager context pointer must continue to point to the storage object
// which owns the backing memory, and keeps track of the reference count.
//
// When we free a container we extract the storage object, decrement its
// reference count, then destroy the container, but leave the underlying
// buffer intact.
container->manager_ctx = reinterpret_cast<void*>(this);
container->dl_tensor.data = this->buffer.data;
NDArray ret(GetObjectPtr<Object>(container));

// is this UB?
// The only change we make w.r.t offset is modifying the data pointer
// of the backing tensor to point into the buffer instead of its start.
auto offset_ptr = reinterpret_cast<uint8_t*>(this->buffer.data) + offset;
container->dl_tensor.data = reinterpret_cast<void*>(offset_ptr);

NDArray ret(GetObjectPtr<Object>(container));
// RAII in effect, now run the check.
// TODO(@jroesch): generalize later to non-overlapping allocations.
CHECK(needed_size == this->buffer.size)
<< "size mistmatch required " << needed_size << " found " << this->buffer.size;

CHECK(offset + needed_size <= this->buffer.size)
<< "storage allocation failure, attempted to allocate "
<< needed_size << " at offset "
<< offset << " in region that is "
<< this->buffer.size << "bytes";

return ret;
}
Expand Down
4 changes: 4 additions & 0 deletions src/runtime/vm/vm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,15 @@ Instruction::Instruction(const Instruction& instr) {
return;
case Opcode::AllocTensor:
this->alloc_tensor.storage = instr.alloc_tensor.storage;
this->alloc_tensor.offset = instr.alloc_tensor.offset;
this->alloc_tensor.ndim = instr.alloc_tensor.ndim;
this->alloc_tensor.shape = Duplicate<int64_t>(instr.alloc_tensor.shape,
instr.alloc_tensor.ndim);
this->alloc_tensor.dtype = instr.alloc_tensor.dtype;
return;
case Opcode::AllocTensorReg:
this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage;
this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset;
this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register;
this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype;
return;
Expand Down Expand Up @@ -176,13 +178,15 @@ Instruction& Instruction::operator=(const Instruction& instr) {
return *this;
case Opcode::AllocTensor:
this->alloc_tensor.storage = this->alloc_tensor.storage;
this->alloc_tensor.offset = instr.alloc_tensor.offset;
this->alloc_tensor.ndim = instr.alloc_tensor.ndim;
this->alloc_tensor.shape = Duplicate<int64_t>(instr.alloc_tensor.shape,
instr.alloc_tensor.ndim);
this->alloc_tensor.dtype = instr.alloc_tensor.dtype;
return *this;
case Opcode::AllocTensorReg:
this->alloc_tensor_reg.storage = instr.alloc_tensor_reg.storage;
this->alloc_tensor_reg.offset = instr.alloc_tensor_reg.offset;
this->alloc_tensor_reg.shape_register = instr.alloc_tensor_reg.shape_register;
this->alloc_tensor_reg.dtype = instr.alloc_tensor_reg.dtype;
return *this;
Expand Down
17 changes: 10 additions & 7 deletions tests/python/relay/test_memory_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def check_memory_plan(func, check_fn):
py_res = check_fn(*[arg.asnumpy() for arg in args])

# First check that the two VM results agree.
np.testing_assert_allclose(
np.testing.assert_allclose(
no_plan_result.asnumpy(),
plan_result.asnumpy())

Expand Down Expand Up @@ -91,15 +91,18 @@ def test_add_sub():
func = relay.Function([x, y], z)
check_memory_plan(func, check_add_sub)

def check_no_fuse(x, y, w):
z = x + y
return np.matmul(z, np.transpose(w))

def test_no_fuse():
x = relay.var('x', shape=(10,))
y = relay.var('y', shape=(10,))
w = relay.var('w', shape=(10, 10))
z = x + x
z = z - y
x = relay.var('x', shape=(5, 1))
y = relay.var('y', shape=(5, 1))
w = relay.var('w', shape=(5, 1))
z = x + y
out = relay.op.nn.dense(z, w)
func = relay.Function([x, y, w], out)
check_memory_plan(func, check_add_sub)
check_memory_plan(func, check_no_fuse)

if __name__ == "__main__":
test_tyck_alloc_tensor()
Expand Down

0 comments on commit df612a1

Please sign in to comment.