Skip to content

Commit

Permalink
Fix TVMArray layout on device (apache#5599)
Browse files Browse the repository at this point in the history
  • Loading branch information
areusch authored and trevor-m committed Jun 18, 2020
1 parent 61cb121 commit 8d00c54
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 83 deletions.
31 changes: 16 additions & 15 deletions src/runtime/micro/micro_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,8 @@ std::tuple<TargetPtr, TargetPtr> MicroSession::EncoderAppend(TargetDataLayoutEnc
const int* type_codes = args.type_codes;
int num_args = args.num_args;

auto tvm_vals_slot = encoder->Alloc<TVMValue>(num_args);
auto type_codes_slot = encoder->Alloc<const int>(num_args);
auto tvm_vals_alloc = encoder->Alloc<TVMValue>(num_args);
auto type_codes_alloc = encoder->Alloc<const int>(num_args);

for (int i = 0; i < num_args; i++) {
switch (type_codes[i]) {
Expand All @@ -425,7 +425,7 @@ std::tuple<TargetPtr, TargetPtr> MicroSession::EncoderAppend(TargetDataLayoutEnc

TVMValue val;
val.v_handle = arr_ptr;
tvm_vals_slot.WriteValue(val);
tvm_vals_alloc->WriteValue(val);
break;
}
// TODO(weberlo): Implement `double` and `int64` case.
Expand All @@ -437,25 +437,24 @@ std::tuple<TargetPtr, TargetPtr> MicroSession::EncoderAppend(TargetDataLayoutEnc
break;
}
}
type_codes_slot.WriteArray(type_codes, num_args);
return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr());
type_codes_alloc->WriteArray(type_codes, num_args);
encoder->CheckUnfilledAllocs();
return std::make_tuple(tvm_vals_alloc->start_addr(), type_codes_alloc->start_addr());
}

template <typename T>
TargetPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const DLTensor& arr) {
auto tvm_arr_slot = encoder->Alloc<T>();
auto shape_slot = encoder->Alloc<int64_t>(arr.ndim);

// `shape` and `strides` are stored on the host, so we need to write them to
// the device first. The `data` field is already allocated on the device and
// is a device pointer, so we don't need to write it.
shape_slot.WriteArray(arr.shape, arr.ndim);
TargetPtr shape_dev_addr = shape_slot.start_addr();
auto shape_alloc = encoder->Alloc<int64_t>(arr.ndim);
shape_alloc->WriteArray(arr.shape, arr.ndim);
TargetPtr shape_dev_addr = shape_alloc->start_addr();
TargetPtr strides_dev_addr = TargetPtr(word_size_, nullptr);
if (arr.strides != nullptr) {
auto stride_slot = encoder->Alloc<int64_t>(arr.ndim);
stride_slot.WriteArray(arr.strides, arr.ndim);
strides_dev_addr = stride_slot.start_addr();
auto stride_alloc = encoder->Alloc<int64_t>(arr.ndim);
stride_alloc->WriteArray(arr.strides, arr.ndim);
strides_dev_addr = stride_alloc->start_addr();
}

T dev_arr(TargetVal{word_size_.bits(), reinterpret_cast<uint64_t>(arr.data)}, arr.ctx, arr.ndim,
Expand All @@ -466,8 +465,10 @@ TargetPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const DL
// Update the device type to CPU, because from the microcontroller's
// perspective, it is.
dev_arr.ctx.device_type = DLDeviceType::kDLCPU;
tvm_arr_slot.WriteValue(dev_arr);
return tvm_arr_slot.start_addr();

auto tvm_arr_alloc = encoder->Alloc<T>();
tvm_arr_alloc->WriteValue(dev_arr);
return tvm_arr_alloc->start_addr();
}

// TODO(weberlo): switch over entirely to error codes that expand to error
Expand Down
26 changes: 7 additions & 19 deletions src/runtime/micro/micro_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,16 +315,13 @@ struct MicroDevSpace {
struct TVMArray32 {
TVMArray32(TargetVal data, DLContext ctx, int32_t ndim, DLDataType dtype, TargetVal shape,
TargetVal strides, TargetVal byte_offset)
: data(data.uint32()),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.uint32()),
strides(strides.uint32()),
pad1(0),
byte_offset(byte_offset.uint32()),
pad2(0) {}
: data{data.uint32()},
ctx{ctx},
ndim{ndim},
dtype{dtype},
shape{shape.uint32()},
strides{strides.uint32()},
byte_offset{byte_offset.uint32()} {}

/*!
* \brief The opaque data pointer points to the allocated data.
Expand All @@ -336,8 +333,6 @@ struct TVMArray32 {
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
Expand All @@ -347,12 +342,8 @@ struct TVMArray32 {
* can be NULL, indicating tensor is compact.
*/
uint32_t strides;
/*! \brief Padding to enforce struct alignment */
uint32_t pad1;
/*! \brief The offset in bytes to the beginning pointer to data */
uint32_t byte_offset;
/*! \brief Padding to enforce struct alignment */
uint32_t pad2;
};

/*! \brief TVM array for serialization to 64-bit devices */
Expand All @@ -362,7 +353,6 @@ struct TVMArray64 {
: data(data.uint64()),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.uint64()),
strides(strides.uint64()),
Expand All @@ -377,8 +367,6 @@ struct TVMArray64 {
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
Expand Down
73 changes: 73 additions & 0 deletions src/runtime/micro/target_data_layout_encoder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "target_data_layout_encoder.h"

namespace tvm {
namespace runtime {

TargetDataLayoutEncoder::Alloc::Alloc(TargetDataLayoutEncoder* parent, size_t start_offset,
size_t size, TargetPtr start_addr)
: parent_(parent),
start_offset_(start_offset),
curr_offset_(0),
size_(size),
start_addr_(start_addr) {
parent_->live_unchecked_allocs_.insert(this);
}

TargetDataLayoutEncoder::Alloc::~Alloc() {
auto it = parent_->live_unchecked_allocs_.find(this);
if (it != parent_->live_unchecked_allocs_.end()) {
// alloc was not already checked
parent_->live_unchecked_allocs_.erase(it);
if (curr_offset_ != size_) {
parent_->unchecked_alloc_start_offsets_.push_back(start_addr_.value().uint64());
}
}
}

void TargetDataLayoutEncoder::Alloc::CheckUnfilled() {
CHECK(curr_offset_ == size_) << "unwritten space in alloc 0x" << std::hex
<< start_addr_.value().uint64() << "; curr_offset=0x" << curr_offset_
<< ", size=0x" << size_;
}

TargetPtr TargetDataLayoutEncoder::Alloc::start_addr() { return start_addr_; }

size_t TargetDataLayoutEncoder::Alloc::size() { return size_; }

void TargetDataLayoutEncoder::CheckUnfilledAllocs() {
CHECK(live_unchecked_allocs_.size() > 0) << "No allocs to check";
if (unchecked_alloc_start_offsets_.size() > 0) {
LOG(ERROR) << "Unchecked allocs were found:";
for (size_t alloc_start_addr : unchecked_alloc_start_offsets_) {
LOG(ERROR) << " * 0x" << std::hex << alloc_start_addr;
}
CHECK(false) << "Unchecked allocs found during CheckUnfilledAllocs";
}

for (class Alloc* s : live_unchecked_allocs_) {
s->CheckUnfilled();
}
live_unchecked_allocs_.clear();
}

} // namespace runtime
} // namespace tvm
88 changes: 39 additions & 49 deletions src/runtime/micro/target_data_layout_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
#ifndef TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_
#define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_

#include <memory>
#include <set>
#include <vector>

#include "host_driven/utvm_runtime_enum.h"
#include "micro_common.h"

namespace tvm {
namespace runtime {
Expand All @@ -41,55 +44,60 @@ class TargetDataLayoutEncoder {
/*!
* \brief helper class for writing into `TargetDataLayoutEncoder`
*/
template <typename T>
class Slot {
class Alloc {
public:
/*!
* \brief constructor
* \param parent pointer to parent encoder
* \param start_offset start byte offset of the slot in the backing buffer
* \param size size (in bytes) of the memory region allocated for this slot
* \param start_addr start address of the slot in the device's memory
* \param start_offset start byte offset of the alloc in the backing buffer
* \param size size (in bytes) of the memory region allocated for this alloc
* \param start_addr start address of the alloc in the device's memory
*/
Slot(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, TargetPtr start_addr);
Alloc(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, TargetPtr start_addr);

~Slot();
~Alloc();

/*!
* \brief writes `sizeof(T) * num_elems` bytes of data from `arr`
* \param arr array to be read from
* \param num_elems number of elements in array
*/
template <typename T>
void WriteArray(const T* arr, size_t num_elems);

/*!
* \brief writes `val`
* \param val value to be written
*/
template <typename T>
void WriteValue(const T& val);

/*!
* \brief returns start address of the slot in device memory
* \brief returns start address of the alloc in device memory
* \return device start address
*/
TargetPtr start_addr();

/*!
* \brief returns number of bytes allocated for this slot
* \return size of this slot
* \brief returns number of bytes allocated for this alloc
* \return size of this alloc
*/
size_t size();

size_t curr_offset() const { return curr_offset_; }

void CheckUnfilled();

private:
/*! \brief pointer to parent encoder */
TargetDataLayoutEncoder* parent_;
/*! \brief start offset of the slot in the parent's backing parent_buffer */
/*! \brief start offset of the alloc in the parent's backing parent_buffer */
size_t start_offset_;
/*! \brief current offset relative to the start offset of this slot */
/*! \brief current offset relative to the start offset of this alloc */
size_t curr_offset_;
/*! \brief size (in bytes) of the memory region allocated for this slot */
/*! \brief size (in bytes) of the memory region allocated for this alloc */
size_t size_;
/*! \brief start address of the slot in the device's memory */
/*! \brief start address of the alloc in the device's memory */
TargetPtr start_addr_;
};

Expand All @@ -105,21 +113,23 @@ class TargetDataLayoutEncoder {
word_size_(word_size) {}

/*!
* \brief allocates a slot for `sizeof(T) * num_elems` bytes of data
* \brief allocates a alloc for `sizeof(T) * num_elems` bytes of data
* \param num_elems number of elements of type `T` being allocated (defaults to 1)
* \return slot of size `sizeof(T) * num_elems` bytes
* \return alloc of size `sizeof(T) * num_elems` bytes
*/
template <typename T>
Slot<T> Alloc(size_t num_elems = 1) {
std::unique_ptr<class Alloc> Alloc(size_t num_elems = 1) {
curr_offset_ = UpperAlignValue(curr_offset_, word_size_.bytes());
size_t size = sizeof(T) * num_elems;
if (curr_offset_ + size > buf_.size()) {
buf_.resize(curr_offset_ + size);
}
CHECK(buf_.size() < capacity_) << "out of space in data encoder";
size_t slot_start_offset = curr_offset_;
size_t alloc_start_offset = curr_offset_;
curr_offset_ += size;
return Slot<T>(this, slot_start_offset, size, start_addr() + slot_start_offset);
class Alloc* alloc =
new class Alloc(this, alloc_start_offset, size, start_addr() + alloc_start_offset);
return std::unique_ptr<class Alloc>(alloc);
}

void Clear() {
Expand Down Expand Up @@ -150,6 +160,8 @@ class TargetDataLayoutEncoder {
TargetPtr(word_size_, UpperAlignValue(start_addr.value().uint64(), word_size_.bytes()));
}

void CheckUnfilledAllocs();

private:
/*! \brief in-memory backing buffer */
std::vector<uint8_t> buf_;
Expand All @@ -161,50 +173,28 @@ class TargetDataLayoutEncoder {
size_t capacity_;
/*! \brief number of bytes in a word on the target device */
TargetWordSize word_size_;
/*! \brief Alloc instances allocated now but not yet checked by CheckUnfilledAllocs */
std::set<class Alloc*> live_unchecked_allocs_;
/*! \brief start offsets Alloc instances that were dealloated before CheckUnfilledAllocs ran */
std::vector<size_t> unchecked_alloc_start_offsets_;
friend Alloc::~Alloc();
};

template <typename T>
TargetDataLayoutEncoder::Slot<T>::Slot(TargetDataLayoutEncoder* parent, size_t start_offset,
size_t size, TargetPtr start_addr)
: parent_(parent),
start_offset_(start_offset),
curr_offset_(0),
size_(size),
start_addr_(start_addr) {}

template <typename T>
TargetDataLayoutEncoder::Slot<T>::~Slot() {
// TODO(weberlo, areusch): this can mask the exception thrown by slot allocation... even though
// that doesn't make sense.
CHECK(curr_offset_ == size_) << "unwritten space in slot; curr_offset=" << curr_offset_
<< ", size=" << size_;
}

template <typename T>
void TargetDataLayoutEncoder::Slot<T>::WriteArray(const T* arr, size_t num_elems) {
void TargetDataLayoutEncoder::Alloc::WriteArray(const T* arr, size_t num_elems) {
if (num_elems == 0) return;
size_t size = sizeof(T) * num_elems;
CHECK(curr_offset_ + size <= size_) << "not enough space in slot";
CHECK(curr_offset_ + size <= size_) << "not enough space in alloc";
uint8_t* curr_ptr = &(parent_->data())[start_offset_ + curr_offset_];
std::memcpy(curr_ptr, arr, size);
curr_offset_ += size;
}

template <typename T>
void TargetDataLayoutEncoder::Slot<T>::WriteValue(const T& val) {
void TargetDataLayoutEncoder::Alloc::WriteValue(const T& val) {
WriteArray(&val, 1);
}

template <typename T>
TargetPtr TargetDataLayoutEncoder::Slot<T>::start_addr() {
return start_addr_;
}

template <typename T>
size_t TargetDataLayoutEncoder::Slot<T>::size() {
return size_;
}

} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_

0 comments on commit 8d00c54

Please sign in to comment.