From 7be605fe0a8b7295664309ee5019c5e7e30b4771 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 16 Sep 2017 19:28:00 -0700 Subject: [PATCH] [TOP] GraphExecutor (#11) --- nnvm/Makefile | 9 +- nnvm/amalgamation/.gitignore | 2 - nnvm/amalgamation/Makefile | 32 --- nnvm/amalgamation/amalgamation.py | 100 --------- nnvm/amalgamation/generate.py | 18 -- nnvm/deploy/REAMD.md | 4 + nnvm/deploy/nnvm_runtime.cc | 11 + nnvm/example/src/operator.cc | 196 ----------------- nnvm/include/nnvm/node.h | 2 - nnvm/include/nnvm/tuple.h | 6 +- nnvm/src/README.md | 14 ++ nnvm/src/runtime/graph_executor.cc | 334 +++++++++++++++++++++++++++++ nnvm/src/runtime/graph_executor.h | 150 +++++++++++++ nnvm/src/top/README.md | 2 - 14 files changed, 523 insertions(+), 357 deletions(-) delete mode 100644 nnvm/amalgamation/.gitignore delete mode 100644 nnvm/amalgamation/Makefile delete mode 100644 nnvm/amalgamation/amalgamation.py delete mode 100644 nnvm/amalgamation/generate.py create mode 100644 nnvm/deploy/REAMD.md create mode 100644 nnvm/deploy/nnvm_runtime.cc delete mode 100644 nnvm/example/src/operator.cc create mode 100644 nnvm/src/README.md create mode 100644 nnvm/src/runtime/graph_executor.cc create mode 100644 nnvm/src/runtime/graph_executor.h delete mode 100644 nnvm/src/top/README.md diff --git a/nnvm/Makefile b/nnvm/Makefile index 009e3f9eeec8..868ab48eea38 100644 --- a/nnvm/Makefile +++ b/nnvm/Makefile @@ -11,6 +11,7 @@ include $(config) export LDFLAGS = -pthread -lm export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC +CFLAGS += -Itvm/include -Itvm/dlpack/include ifdef DMLC_CORE_PATH CFLAGS += -I$(DMLC_CORE_PATH)/include @@ -51,10 +52,10 @@ else NO_WHOLE_ARCH= --no-whole-archive endif -all: lib/libnnvm.a lib/libnnvm_top.$(SHARED_LIBRARY_SUFFIX) +all: lib/libnnvm.a lib/libnnvm_top.$(SHARED_LIBRARY_SUFFIX) lib/libnnvm_top_runtime.$(SHARED_LIBRARY_SUFFIX) SRC = $(wildcard src/*.cc src/c_api/*.cc src/core/*.cc src/pass/*.cc) -SRC_TOP = $(wildcard src/top/*.cc, src/top/*/*.cc) +SRC_TOP = $(wildcard src/top/*.cc, src/top/*/*.cc src/runtime/*.cc) ALL_OBJ = $(patsubst %.cc, build/%.o, $(SRC)) TOP_OBJ = $(patsubst %.cc, build/%.o, $(SRC_TOP)) ALL_DEP = $(ALL_OBJ) @@ -76,6 +77,10 @@ lib/libnnvm_top.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ} @mkdir -p $(@D) $(CXX) $(CFLAGS) -shared -o $@ $(filter %.o, $^) $(LDFLAGS) -Wl,${WHOLE_ARCH} lib/libnnvm.a -Wl,${NO_WHOLE_ARCH} +lib/libnnvm_top_runtime.$(SHARED_LIBRARY_SUFFIX): deploy/nnvm_runtime.cc + @mkdir -p $(@D) + $(CXX) $(CFLAGS) -shared -o $@ $(filter %.cc, $^) $(LDFLAGS) + cython: cd python; python setup.py build_ext --inplace diff --git a/nnvm/amalgamation/.gitignore b/nnvm/amalgamation/.gitignore deleted file mode 100644 index e808ea2764c3..000000000000 --- a/nnvm/amalgamation/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -nnvm.d -nnvm.cc diff --git a/nnvm/amalgamation/Makefile b/nnvm/amalgamation/Makefile deleted file mode 100644 index 1f286f055237..000000000000 --- a/nnvm/amalgamation/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -export NNVM_ROOT=`pwd`/.. -export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC - -ifdef DMLC_CORE_PATH - CFLAGS += -I$(DMLC_CORE_PATH)/include -else - CFLAGS += -I$(CURDIR)/../dmlc-core/include -endif - -.PHONY: all clean - -all: libnnvm.a - -nnvm.cc: - python generate.py $@ - -nnvm.d: nnvm.cc - ${CXX} ${CFLAGS} -M -MT nnvm.o \ - -I ${NNVM_ROOT}/ -I ${NNVM_ROOT}/include \ - -D__MIN__=$(MIN) $+ > nnvm.d - -nnvm-all.cc: nnvm.d nnvm.cc - python ./amalgamation.py $+ $@ - -nnvm-all.o: nnvm-all.cc - ${CXX} ${CFLAGS} -fPIC -o $@ -c $+ - -libnnvm.a: nnvm-all.o - ar rcs $@ $+ - -clean: - rm -f *.d *.o *.so *.a nnvm-all.cc nnvm.cc diff --git a/nnvm/amalgamation/amalgamation.py b/nnvm/amalgamation/amalgamation.py deleted file mode 100644 index 310daa9d68e0..000000000000 --- a/nnvm/amalgamation/amalgamation.py +++ /dev/null @@ -1,100 +0,0 @@ -import sys -import os.path, re, StringIO - -blacklist = [ - 'Windows.h', - 'mach/clock.h', 'mach/mach.h', - 'malloc.h', - 'glog/logging.h', 'io/azure_filesys.h', 'io/hdfs_filesys.h', 'io/s3_filesys.h', - 'sys/stat.h', 'sys/types.h', - 'omp.h', 'execinfo.h', 'packet/sse-inl.h' - ] - - -def get_sources(def_file): - sources = [] - files = [] - visited = set() - mxnet_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) - for line in open(def_file): - files = files + line.strip().split(' ') - - for f in files: - f = f.strip() - if not f or f.endswith('.o:') or f == '\\': continue - fn = os.path.relpath(f) - if os.path.abspath(f).startswith(mxnet_path) and fn not in visited: - sources.append(fn) - visited.add(fn) - return sources - -sources = get_sources(sys.argv[1]) - -def find_source(name, start): - candidates = [] - for x in sources: - if x == name or x.endswith('/' + name): candidates.append(x) - if not candidates: return '' - if len(candidates) == 1: return candidates[0] - for x in candidates: - if x.split('/')[1] == start.split('/')[1]: return x - return '' - - -re1 = re.compile('<([./a-zA-Z0-9_-]*)>') -re2 = re.compile('"([./a-zA-Z0-9_-]*)"') - -sysheaders = [] -history = set([]) -out = StringIO.StringIO() - -def expand(x, pending): - if x in history and x not in ['mshadow/mshadow/expr_scalar-inl.h']: # MULTIPLE includes - return - - if x in pending: - #print 'loop found: %s in ' % x, pending - return - - print >>out, "//===== EXPANDING: %s =====\n" %x - for line in open(x): - if line.find('#include') < 0: - out.write(line) - continue - if line.strip().find('#include') > 0: - print line - continue - m = re1.search(line) - if not m: m = re2.search(line) - if not m: - print line + ' not found' - continue - h = m.groups()[0].strip('./') - source = find_source(h, x) - if not source: - if (h not in blacklist and - h not in sysheaders and - 'mkl' not in h and - 'nnpack' not in h): sysheaders.append(h) - else: - expand(source, pending + [x]) - print >>out, "//===== EXPANDED: %s =====\n" %x - history.add(x) - - -expand(sys.argv[2], []) - -f = open(sys.argv[3], 'wb') - - - -for k in sorted(sysheaders): - print >>f, "#include <%s>" % k - -print >>f, '' -print >>f, out.getvalue() - -for x in sources: - if x not in history and not x.endswith('.o'): - print 'Not processed:', x - diff --git a/nnvm/amalgamation/generate.py b/nnvm/amalgamation/generate.py deleted file mode 100644 index 84a5fc06fb03..000000000000 --- a/nnvm/amalgamation/generate.py +++ /dev/null @@ -1,18 +0,0 @@ -import os -import sys - -FOLDERS = ["core", "pass", "c_api"] - -fo = open(sys.argv[1], "w") - - - -for folder in FOLDERS: - path = str(os.path.join("../src", folder)) - flst = os.listdir(path) - for f in flst: - if f.endswith(".cc") == True: - fo.write('#include "' + str(os.path.join("src", folder, f)) + '"\n') - - -fo.close() diff --git a/nnvm/deploy/REAMD.md b/nnvm/deploy/REAMD.md new file mode 100644 index 000000000000..96ab18d7514b --- /dev/null +++ b/nnvm/deploy/REAMD.md @@ -0,0 +1,4 @@ +All in One Deployment File +========================== +This folder contains an all in one deployment file that contains minimum dependencies +needed to run nnvm top runtime. \ No newline at end of file diff --git a/nnvm/deploy/nnvm_runtime.cc b/nnvm/deploy/nnvm_runtime.cc new file mode 100644 index 000000000000..15c46012f6ec --- /dev/null +++ b/nnvm/deploy/nnvm_runtime.cc @@ -0,0 +1,11 @@ +/*! + * Copyright (c) 2017 by Contributors + * All in one runtime + * \file nnvm_runtime.cc + */ +#include "../src/core/graph.cc" +#include "../src/core/node.cc" +#include "../src/core/pass.cc" +#include "../src/core/op.cc" +#include "../src/pass/saveload_json.cc" +#include "../src/runtime/graph_executor.cc" diff --git a/nnvm/example/src/operator.cc b/nnvm/example/src/operator.cc deleted file mode 100644 index 34e4529ecb0b..000000000000 --- a/nnvm/example/src/operator.cc +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) 2016 by Contributors -// This is an example on how we can register operator information to NNVM -// these operator information are used to support various graph building and optimizations -// see tests/python/ folder for the test-cases that uses these information. - -#include -#include -#include -#include -#include -#include - -namespace myproject { - -using nnvm::FListInputNames; -using nnvm::FMutateInputs; -using nnvm::FInferShape; -using nnvm::FInferType; -using nnvm::FInplaceOption; -using nnvm::Node; -using nnvm::NodePtr; -using nnvm::NodeEntry; -using nnvm::FGradient; -using nnvm::NodeAttrs; -using nnvm::TShape; -using nnvm::array_view; - -// simply return the shape as same -inline bool SameShape(const NodeAttrs& attrs, - std::vector *ishape, - std::vector *oshape) { - if (ishape->size() == 0 || (*ishape)[0].ndim() == 0) return false; - for (TShape& pshape : *oshape) { - pshape = (*ishape)[0]; - } - for (TShape& pshape : *ishape) { - pshape = (*ishape)[0]; - } - return true; -} - -inline std::vector > InplaceIn0Out0(const NodeAttrs& attrs) { - return {{0, 0}}; -} - -// quick helper to make node -inline NodeEntry MakeNode(const char* op_name, - std::string node_name, - std::vector inputs) { - NodePtr p = Node::Create(); - p->attrs.op = nnvm::Op::Get(op_name); - p->attrs.name = std::move(node_name); - p->inputs = std::move(inputs); - return NodeEntry{p, 0, 0}; -} - -// simple demonstration of reshape. -NNVM_REGISTER_OP(reshape) -.describe("reshape source to target shape") -.set_num_inputs(1) -.set_attr_parser( - [](NodeAttrs* attrs) { - // parse attr parser to get target attribute - TShape target; - std::istringstream is(attrs->dict.at("target")); - CHECK(is >> target); - attrs->parsed = std::move(target); - }) -.set_attr( - "FInferShape", [] (const NodeAttrs& attrs, - std::vector *ishape, - std::vector *oshape) { - // get parsed attribute - const TShape& target = nnvm::get(attrs.parsed); - (*oshape)[0] = target; - if ((*ishape)[0].ndim() == 0) return false; - CHECK_EQ((*ishape)[0].Size(), target.Size()) - << "Reshape op: source target shape mismatch"; - return true; - }) -.set_attr("FInplaceOption", InplaceIn0Out0); - - -NNVM_REGISTER_OP(cast) -.describe("cast source type to target") -.set_num_inputs(1) -.include("ElementwiseOpAttr") -.set_attr_parser( - [](NodeAttrs* attrs) { - // parse attr parser to get target attribute - int dtype; - std::istringstream is(attrs->dict.at("dtype")); - CHECK(is >> dtype); - attrs->parsed = std::move(dtype); - }) -.set_attr( - "FInferType", [](const NodeAttrs& attrs, - std::vector *itype, - std::vector *otype) { - (*otype)[0] = nnvm::get(attrs.parsed); - return true; - }); - -NNVM_REGISTER_OP(identity) -.describe("identity function") -.set_num_inputs(1) -.include("ElementwiseOpAttr") -.set_attr( - "FGradient", [](const NodePtr& n, - const std::vector& ograds) { - return std::vector{ograds[0]}; - }); - -NNVM_REGISTER_OP(add) -.describe("add two data together") -.set_num_inputs(2) -.add_alias("__add_symbol__") -.include("ElementwiseOpAttr") -.set_attr("FInplaceOption", InplaceIn0Out0) -.set_attr( - "FGradient", [](const NodePtr& n, - const std::vector& ograds){ - return std::vector{ograds[0], ograds[0]}; - }); - -NNVM_REGISTER_OP(mul) -.describe("multiply two data together") -.set_num_inputs(2) -.include("ElementwiseOpAttr") -.set_attr("FInferShape", SameShape) -.set_attr("FInplaceOption", InplaceIn0Out0) -.set_attr( - "FGradient", [](const NodePtr& n, - const std::vector& ograds){ - return std::vector{ - MakeNode("mul", n->attrs.name + "_grad_0", - {ograds[0], n->inputs[1]}), - MakeNode("mul", n->attrs.name + "_grad_1", - {ograds[0], n->inputs[0]}) - }; - }); - -NNVM_REGISTER_OP(__ewise_sum__) -.describe("elementwise sum") -.set_num_inputs(nnvm::kVarg); - -NNVM_REGISTER_OP(__zero__) -.describe("set output to zero") -.set_num_inputs(0); - -NNVM_REGISTER_OP(__one__) -.describe("set output to one") -.set_num_inputs(0); - -NNVM_REGISTER_OP(cross_device_copy) -.describe("Copy data across device.") -.set_num_inputs(1) -.set_attr("FInferShape", SameShape); - - -NNVM_REGISTER_OP(conv2d) -.describe("take conv of input") -.set_num_inputs(2) -.set_attr("FListInputNames", [](const NodeAttrs& attrs) { - return std::vector{"data", "weight"}; - }); - -NNVM_REGISTER_OP(add) -.set_attr("nick_name", "plus"); - -NNVM_REGISTER_OP(assign) -.set_num_inputs(2) -.set_num_outputs(1) -.set_attr("FMutateInputs", [](const NodeAttrs& attrs) { - return std::vector{0}; - }); - -NNVM_REGISTER_OP_GROUP(ElementwiseOpAttr) -.set_attr("FInferShape", SameShape); - - -NNVM_REGISTER_OP(exp) -.describe("take exponential") -.set_num_inputs(1) -.include("ElementwiseOpAttr") -.set_attr( - "FGradient", [](const NodePtr& n, - const std::vector& ograds) { - return std::vector{ - MakeNode("mul", n->attrs.name + "_grad", - {ograds[0], NodeEntry{n, 0, 0}}) - }; - }); - - -} // namespace myproject diff --git a/nnvm/include/nnvm/node.h b/nnvm/include/nnvm/node.h index 54f31e745c03..0e46e23f25e9 100644 --- a/nnvm/include/nnvm/node.h +++ b/nnvm/include/nnvm/node.h @@ -81,8 +81,6 @@ struct NodeAttrs { const Op *op{nullptr}; /*! \brief name of the node */ std::string name; - /*! \brief Vector representation of positional attributes */ - std::vector scalars; /*! \brief The dictionary representation of attributes */ std::unordered_map dict; /*! diff --git a/nnvm/include/nnvm/tuple.h b/nnvm/include/nnvm/tuple.h index b2d049535de9..b3193ca29eb8 100644 --- a/nnvm/include/nnvm/tuple.h +++ b/nnvm/include/nnvm/tuple.h @@ -195,7 +195,7 @@ class Tuple { * \return the ostream */ friend std::ostream &operator<<(std::ostream &os, const Tuple &t) { - os << '('; + os << '['; const ValueType* begin = t.begin(); const ValueType* end = t.end(); for (const ValueType* it = begin; it != end; ++it) { @@ -204,7 +204,7 @@ class Tuple { } // python style tuple if (t.ndim() == 1) os << ','; - os << ')'; + os << ']'; return os; } /*! @@ -235,7 +235,7 @@ class Tuple { while (isspace(is.peek())) { is.get(); } - if (is.peek() == ')') { + if (is.peek() == ')' || is.peek() == ']') { is.get(); return is; } diff --git a/nnvm/src/README.md b/nnvm/src/README.md new file mode 100644 index 000000000000..da3584a73cb1 --- /dev/null +++ b/nnvm/src/README.md @@ -0,0 +1,14 @@ +Project Structure +================= + +The following components are operator invariant. + +- c_api: NNVM C API +- core: NNVM core data structure +- pass: NNVM pass + +The following components are generic graph compiler for NNVM-TOP + +- top: NNVM-TOP core operator defs +- tvm: NNVM-TOP to TVM compiler toolchain +- runtime: NNVM-TOP runtime diff --git a/nnvm/src/runtime/graph_executor.cc b/nnvm/src/runtime/graph_executor.cc new file mode 100644 index 000000000000..5bdf1979dfa8 --- /dev/null +++ b/nnvm/src/runtime/graph_executor.cc @@ -0,0 +1,334 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file graph_executor.cc + */ +#include +#include +#include +#include "./graph_executor.h" + +namespace nnvm { +namespace runtime { + +/*! \brief macro to do C API call */ +#define TVM_CCALL(func) \ + { \ + int ret = (func); \ + CHECK_EQ(ret, 0) \ + << TVMGetLastError(); \ + } + +using ::tvm::runtime::PackedFunc; +using ::tvm::runtime::TVMArgs; +using ::tvm::runtime::TVMRetValue; + +PackedFunc GraphExecutor::GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) { + // return member functions during query. + if (name == "set_input") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + if (args[0].type_code() == kStr) { + this->SetInput(this->GetInputIndex(args[0]), args[1]); + } else { + this->SetInput(args[0], args[1]); + } + }); + } else if (name == "get_output") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + this->GetOutput(args[0], args[1]); + }); + } else if (name == "run") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + this->Run(); + }); + } else if (name == "load_params") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + this->LoadParams(args[0].operator std::string()); + }); + } else { + return PackedFunc(); + } +} + +GraphExecutor::~GraphExecutor() { + for (DLTensor* t : storage_pool_) { + TVM_CCALL(TVMArrayFree(t)); + } +} + +void GraphExecutor::Run() { + // setup the array and requirements. + for (size_t i = 0; i < op_execs_.size(); ++i) { + if (op_execs_[i]) op_execs_[i](); + } +} + +void GraphExecutor::Init(Graph graph, + tvm::runtime::Module module, + TVMContext ctx) { + graph_ = std::move(graph); + module_ = std::move(module); + ctx_ = ctx; + this->SetupStorage(); + this->SetupOpExecs(); +} + +int GraphExecutor::GetInputIndex(const std::string& name) { + const auto& idx = graph_.indexed_graph(); + for (size_t i = 0; i< idx.input_nodes().size(); ++i) { + if (idx[idx.input_nodes()[i]].source->attrs.name == name) { + return static_cast(i); + } + } + LOG(FATAL) << "cannot find " << name << " among input"; + return -1; +} + +void GraphExecutor::SetInput(int index, DLTensor* data_in) { + const auto& idx = graph_.indexed_graph(); + CHECK_LT(static_cast(index), idx.input_nodes().size()); + uint32_t eid = idx.entry_id(idx.input_nodes()[index], 0); + TVM_CCALL(TVMArrayCopyFromTo(data_in, &data_entry_[eid], nullptr)); +} + +void GraphExecutor::GetOutput(int index, DLTensor* data_out) { + const auto& idx = graph_.indexed_graph(); + CHECK_LT(static_cast(index), idx.outputs().size()); + uint32_t eid = idx.entry_id(idx.outputs()[index]); + TVM_CCALL(TVMArrayCopyFromTo(&data_entry_[eid], data_out, nullptr)); +} + +bool LoadDLTensor(dmlc::Stream* strm, DLTensor* tensor) { + uint64_t header, reserved; + CHECK(strm->Read(&header, sizeof(header))) + << "Invalid DLTensor file format"; + CHECK(strm->Read(&reserved, sizeof(reserved))) + << "Invalid DLTensor file format"; + CHECK(header == kTVMNDArrayMagic) + << "Invalid DLTensor file format"; + + CHECK(strm->Read(&tensor->ctx, sizeof(tensor->ctx))) + << "Invalid DLTensor file format"; + CHECK(strm->Read(&tensor->ndim, sizeof(tensor->ndim))) + << "Invalid DLTensor file format"; + CHECK(strm->Read(&tensor->dtype, sizeof(tensor->dtype))) + << "Invalid DLTensor file format"; + + int ndim = tensor->ndim; + CHECK(strm->Read(tensor->shape, sizeof(int64_t) * ndim)) + << "Invalid DLTensor file format"; + + int64_t size = 1; + int type_size = tensor->dtype.bits / 8; + for (int i = 0; i < ndim; ++i) { + size *= tensor->shape[i]; + } + int64_t data_byte_size; + CHECK(strm->Read(&data_byte_size, sizeof(data_byte_size))) + << "Invalid DLTensor file format"; + CHECK(data_byte_size == type_size * size) + << "Invalid DLTensor file format"; + CHECK(strm->Read(tensor->data, type_size * size)) + << "Invalid DLTensor file format"; + return true; +} + +void GraphExecutor::LoadParams(dmlc::Stream* strm) { + uint64_t header, reserved; + CHECK(strm->Read(&header)) + << "Invalid parameters file format"; + CHECK(header == kTVMNDArrayListMagic) + << "Invalid parameters file format"; + CHECK(strm->Read(&reserved)) + << "Invalid parameters file format"; + + std::vector names; + CHECK(strm->Read(&names)) + << "Invalid parameters file format"; + + std::unordered_map name_eid; + const auto& idx = graph_.indexed_graph(); + for (int nid : idx.input_nodes()) { + name_eid.emplace(idx[nid].source->attrs.name, idx.entry_id(nid, 0)); + } + + uint64_t sz; + strm->Read(&sz, sizeof(sz)); + size_t size = static_cast(sz); + CHECK(size == names.size()) + << "Invalid parameters file format"; + for (size_t i = 0; i < size; ++i) { + auto iter = name_eid.find(names[i]); + CHECK(iter != name_eid.end()); + CHECK(LoadDLTensor(strm, &data_entry_[iter->second])) + << "Invalid parameters file format"; + } +} + +void GraphExecutor::LoadParams(const std::string& param_blob) { + dmlc::MemoryStringStream strm(const_cast(¶m_blob)); + this->LoadParams(&strm); +} + +void GraphExecutor::SetupStorage() { + const auto& idx = graph_.indexed_graph(); + // Grab saved optimization plan from graph. + auto vstorage = graph_.MoveCopyAttr("storage_id"); + std::vector vtype; + for (const std::string& s_type : + graph_.GetAttr >("dltype")) { + vtype.push_back(tvm::runtime::String2TVMType(s_type)); + } + data_shape_ = graph_.GetAttr("shape"); + data_entry_.resize(idx.num_node_entries()); + // Find the maximum space size. + int max_id = 0; + for (size_t i = 0; i < data_shape_.size(); ++i) { + max_id = std::max(vstorage[i] + 1, max_id); + } + for (const auto& e : idx.input_nodes()) { + vstorage[idx.entry_id(e, 0)] = max_id++; + } + // size of each storage pool entry + std::vector pool_entry_bytes; + // Find the maximum space size. + for (size_t i = 0; i < data_shape_.size(); ++i) { + int storage_id = vstorage[i]; + size_t size = data_shape_[i].Size(); + CHECK_GE(storage_id, 0) << "Do not support runtime shape op"; + DLDataType t = vtype[i]; + size_t bits = t.bits * t.lanes; + CHECK_EQ(bits % 8U, 0U); + size_t bytes = (bits / 8U) * size; + + size_t sid = static_cast(storage_id); + if (sid >= pool_entry_bytes.size()) { + pool_entry_bytes.resize(sid + 1, 0); + } + pool_entry_bytes[sid] = std::max(pool_entry_bytes[sid], bytes); + } + // Allocate the space. + for (size_t i = 0; i < pool_entry_bytes.size(); ++i) { + TShape shape{static_cast(pool_entry_bytes[i] + 3) / 4}; + DLTensor* tensor; + TVM_CCALL(TVMArrayAlloc( + shape.data(), 1, kFloat, 32, 1, ctx_.device_type, ctx_.device_id, &tensor)); + storage_pool_.push_back(tensor); + } + // Assign the pooled entries. + for (size_t i = 0; i < data_entry_.size(); ++i) { + int storage_id = vstorage[i]; + data_entry_[i] = *storage_pool_[storage_id]; + data_entry_[i].shape = const_cast(data_shape_[i].data()); + data_entry_[i].ndim = data_shape_[i].ndim(); + data_entry_[i].dtype = vtype[i]; + } +} + +void GraphExecutor::SetupOpExecs() { + static const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op"); + const auto& idx = graph_.indexed_graph(); + op_execs_.resize(idx.num_nodes()); + // setup the array and requirements. + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + if (inode.source->is_variable()) continue; + std::vector args; + for (const auto& e : inode.inputs) { + args.push_back(data_entry_[idx.entry_id(e)]); + } + for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { + uint32_t eid = idx.entry_id(nid, index); + args.push_back(data_entry_[eid]); + } + CHECK_EQ(inode.source->op(), tvm_op) + << "transform the graph to tvm op"; + op_execs_[nid] = CreateTVMOp( + inode.source->attrs, args, inode.inputs.size()); + } +} + +std::function GraphExecutor::CreateTVMOp( + const nnvm::NodeAttrs& attrs, + const std::vector& args, + size_t num_inputs) { + struct OpArgs { + std::vector args; + std::vector arg_values; + std::vector arg_tcodes; + std::vector shape_data; + }; + const TVMOpParam& param = nnvm::get(attrs.parsed); + std::shared_ptr arg_ptr = std::make_shared(); + // setup address. + arg_ptr->args = std::move(args); + if (param.flatten_data) { + arg_ptr->shape_data.resize(arg_ptr->args.size()); + } + for (size_t i = 0; i < arg_ptr->args.size(); ++i) { + TVMValue v; + DLTensor* t = &(arg_ptr->args[i]); + v.v_handle = t; + arg_ptr->arg_values.push_back(v); + arg_ptr->arg_tcodes.push_back(kArrayHandle); + if (param.flatten_data) { + arg_ptr->shape_data[i] = std::accumulate( + t->shape, t->shape + t->ndim, 1, std::multiplies()); + t->ndim = 1; + t->shape = &(arg_ptr->shape_data[i]); + } + } + // get compiled function from module. + tvm::runtime::PackedFunc pf = module_.GetFunction(param.func_name, false); + CHECK(pf != nullptr) << "no such function in module: " << param.func_name; + auto fexec = [arg_ptr, pf] () { + TVMRetValue rv; + TVMArgs targs(arg_ptr->arg_values.data(), + arg_ptr->arg_tcodes.data(), + static_cast(arg_ptr->arg_values.size())); + pf.CallPacked(targs, &rv); + }; + return fexec; +} + +// parser +inline void TVMOpParamParser(nnvm::NodeAttrs* attrs) { + TVMOpParam param; + param.Init(attrs->dict); + attrs->parsed = std::move(param); +} + +DMLC_REGISTER_PARAMETER(TVMOpParam); + +NNVM_REGISTER_OP(tvm_op) +.set_attr_parser(TVMOpParamParser) +.set_num_inputs([](const NodeAttrs& attrs) { + const TVMOpParam& param = nnvm::get(attrs.parsed); + return param.num_inputs; + }) +.set_num_outputs([](const NodeAttrs& attrs) { + const TVMOpParam& param = nnvm::get(attrs.parsed); + return param.num_outputs; + }); + +TVM_REGISTER_GLOBAL("nnvm.tvm.create_executor") +.set_body([](TVMArgs args, TVMRetValue *rv) { + std::string sym_json = args[0]; + std::string param_blob = args[1]; + tvm::runtime::Module m = args[2]; + TVMContext ctx; + ctx.device_type = static_cast(args[3].operator int()); + ctx.device_id = args[4]; + // load graph from json string + nnvm::Graph g; + g.attrs["json"] = std::make_shared(sym_json); + g = nnvm::ApplyPass(std::move(g), "LoadJSON"); + std::shared_ptr exec = std::make_shared(); + exec->Init(g, m, ctx); + // load params form stream of string + exec->LoadParams(std::move(param_blob)); + *rv = tvm::runtime::Module(exec); + }); +} // namespace runtime +} // namespace nnvm diff --git a/nnvm/src/runtime/graph_executor.h b/nnvm/src/runtime/graph_executor.h new file mode 100644 index 000000000000..243c71646ba7 --- /dev/null +++ b/nnvm/src/runtime/graph_executor.h @@ -0,0 +1,150 @@ +/*! + * Copyright (c) 2017 by Contributors + * + * Runtime module for graph deployment. + * + * \file graph_executor.h + */ +#ifndef NNVM_RUNTIME_GRAPH_EXECUTOR_H_ +#define NNVM_RUNTIME_GRAPH_EXECUTOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nnvm { +namespace runtime { + +/*! \brief Magic number for NDArray file */ +constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F; +/*! \brief Magic number for NDArray list file */ +constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7; + +/*! \brief DLPack compatible data types */ +using DLTypeVector = std::vector; + +/*! \brief operator attributes about tvm op */ +struct TVMOpParam : public dmlc::Parameter { + std::string func_name; + uint32_t num_inputs; + uint32_t num_outputs; + bool flatten_data; + + DMLC_DECLARE_PARAMETER(TVMOpParam) { + DMLC_DECLARE_FIELD(func_name); + DMLC_DECLARE_FIELD(num_inputs).set_default(1); + DMLC_DECLARE_FIELD(num_outputs).set_default(1); + DMLC_DECLARE_FIELD(flatten_data).set_default(false); + } +}; + +/*! + * \brief TVM Graph Executor. + * This is a minimum graph executor, embedded in TVM runtime + * without any framework dependency. + * + * This runtime can be acccesibly in various language via + * TVM runtime PackedFunc API. + */ +class GraphExecutor : public ::tvm::runtime::ModuleNode { + public: + /*! + * \return The type key of the executor. + */ + const char* type_key() const final { + return "GraphExecutor"; + } + /*! + * \brief Get member function to front-end + * \param name The name of the function. + * \param sptr_to_self The pointer to the module node. + * \return The corresponding member function. + */ + tvm::runtime::PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final; + /*! \brief destructor */ + ~GraphExecutor(); + /*! + * \brief Initialize the graph executor with graph and context. + * \param graph The execution graph. + * \param module The module containing the compiled functions. + * \param ctx The context where the graph should sit on + */ + void Init(Graph graph, + tvm::runtime::Module module, + TVMContext ctx); + /*! + * \brief Get the input index given the name of input. + * \param name The name of the input. + * \return The index of input. + */ + int GetInputIndex(const std::string& name); + /*! + * \brief set index-th input to the graph. + * \param index The input index. + * \param data The input data. + */ + void SetInput(int index, DLTensor* data); + /*! + * \brief Copy index-th output to data_out. + * \param index The output index. + * \param data_out the output data. + */ + void GetOutput(int index, DLTensor* data_out); + /*! + * \brief Load parameters from binary stream + * \param strm The input stream. + */ + void LoadParams(dmlc::Stream* strm); + /*! + * \brief Load parameters from parameter blob. + * \param param_blob A binary blob of parameter. + */ + void LoadParams(const std::string& param_blob); + /*! + * \brief Execute the graph, update output. + */ + void Run(); + + private: + /*! \brief Setup the temporal storage */ + void SetupStorage(); + /*! \brief Setup the executors */ + void SetupOpExecs(); + /*! + * \brief Create a executtion function given input. + * \param attrs The node attributes + * \param args The arguments to the functor, including inputs and outputs. + * \param num_inputs Number of inputs + * \return The created executor. + */ + std::function CreateTVMOp(const NodeAttrs& attrs, + const std::vector& args, + size_t num_inputs); + /*! \brief The graph */ + Graph graph_; + /*! \brief The code module */ + tvm::runtime::Module module_; + /*! \brief execution context */ + TVMContext ctx_; + /*! \brief common storage pool */ + std::vector storage_pool_; + /*! \brief data shape of each node entry */ + std::vector data_shape_; + /*! \brief data entry of each node */ + std::vector data_entry_; + /*! \brief operator on each node */ + std::vector > op_execs_; +}; + +} // namespace runtime +} // namespace nnvm + +#endif // NNVM_RUNTIME_GRAPH_EXECUTOR_H_ diff --git a/nnvm/src/top/README.md b/nnvm/src/top/README.md deleted file mode 100644 index 4da78195e267..000000000000 --- a/nnvm/src/top/README.md +++ /dev/null @@ -1,2 +0,0 @@ -Core Operator List -==================