Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Hexagon VTCM and discontiguous allocation support #9525

Merged
merged 8 commits into from
Dec 11, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,8 @@ endif()
if(GTEST_FOUND)
file(GLOB_RECURSE TEST_SRCS tests/cpp/*.cc)
add_executable(cpptest ${TEST_SRCS})
# include runtime files for unit testing
target_include_directories(cpptest PUBLIC "src/runtime")
target_link_libraries(cpptest PRIVATE ${TVM_TEST_LIBRARY_NAME} GTest::GTest GTest::Main pthread dl)
set_target_properties(cpptest PROPERTIES EXCLUDE_FROM_ALL 1)
set_target_properties(cpptest PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD 1)
Expand Down
2 changes: 2 additions & 0 deletions cmake/modules/Hexagon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ endif()
# Don't run these checks when compiling Hexagon device code,
# e.g. when compiling the TVM runtime for Hexagon.
if (NOT BUILD_FOR_HEXAGON)
# append select runtime sources for unit testing
list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon/hexagon_buffer.cc)
if(USE_HEXAGON_LAUNCHER STREQUAL "ON")
set(USE_HEXAGON_DEVICE "${PICK_SIM}")
else()
Expand Down
233 changes: 185 additions & 48 deletions src/runtime/hexagon/hexagon/hexagon_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,76 +21,154 @@

#include <tvm/runtime/module.h>

#include "hexagon_common.h"

#ifdef BUILD_FOR_HEXAGON
#include "HAP_compute_res.h"
#endif

#include <string>
#include <utility>

#include "hexagon_common.h"

namespace tvm {
namespace runtime {
namespace hexagon {

static size_t GetDataAlignment(const DLDataType dtype) {
size_t align = (dtype.bits / 8) * dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}
struct Allocation {
Allocation(size_t nbytes, size_t alignment) : nbytes_(nbytes), alignment_(alignment) {}
virtual ~Allocation() {}
Allocation(const Allocation&) = delete;
Allocation& operator=(const Allocation&) = delete;
Allocation(Allocation&&) = delete;
Allocation& operator=(Allocation&&) = delete;

HexagonBuffer::HexagonBuffer(int ndim, const int64_t* shape, DLDataType dtype,
Optional<String> scope) {
ICHECK_LE(ndim, 1) << "Hexagon currently only supports flat allocations "
<< "and arrays of flat allocations.";
void* data_{nullptr};
size_t nbytes_;
size_t alignment_;
};

size_t alignment = GetDataAlignment(dtype);
// TODO(csullivan): Extend to support arrays of allocations.
// Move assignment from r-value constructed flat allocation.
*this = HexagonBuffer(shape[0] * (dtype.bits / 8) * dtype.lanes, alignment, scope);
}
struct DDRAllocation : public Allocation {
DDRAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
#ifdef _WIN32
data_ = _aligned_malloc(nbytes, alignment);
CHECK(data_ != nullptr);
#else
int ret = posix_memalign(&data_, alignment, nbytes);
CHECK_EQ(ret, 0);
#endif
}
~DDRAllocation() {
#ifdef _WIN32
_aligned_free(data_);
#else
free(data_);
#endif
}
};

HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope) {
void* ptr = nullptr;
int ret = posix_memalign(&ptr, alignment, nbytes);
adstraw marked this conversation as resolved.
Show resolved Hide resolved
if (ret != 0) {
throw std::bad_alloc();
#ifdef BUILD_FOR_HEXAGON
struct VTCMAllocation : public Allocation {
VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
// TODO(Straw): Alignment not used when allocating VTCM
compute_res_attr_t res_info;
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));
// TODO(Straw): Magic number 1
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, nbytes, 1));
// TODO(Straw): HEXAGON_SAFE_CALL?
// TODO(Straw): Magic number 10000
context_id_ = HAP_compute_res_acquire(&res_info, 10000);
if (context_id_) {
// TODO(Straw): HEXAGON_SAFE_CALL?
data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
if (!data_) {
HEXAGON_PRINT(ERROR, "ERROR: Allocated VTCM ptr is null.");
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
return;
}
} else {
HEXAGON_PRINT(ERROR, "ERROR: Unable to acquire requeisted resource.");
return;
}
// HEXAGON_PRINT(ALWAYS, "VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_, data_);
}
allocations_.push_back(ptr);
SetStorageScope(scope);
~VTCMAllocation() {
// HEXAGON_PRINT(ALWAYS, "~VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_,
// data_);
// TODO(Straw): Need to handle the else case(s) here
if (context_id_ && data_) {
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
data_ = nullptr;
}
}
unsigned int context_id_{0};
};
#else
struct VTCMAllocation : public DDRAllocation {
VTCMAllocation(size_t nbytes, size_t alignment) : DDRAllocation(nbytes, alignment) {}
};
#endif

template <HexagonBuffer::StorageScope S>
std::unique_ptr<Allocation> Allocator(size_t nbytes, size_t alignment);

template <>
std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kDDR>(size_t nbytes,
size_t alignment) {
return std::make_unique<DDRAllocation>(nbytes, alignment);
}

template <>
std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kVTCM>(size_t nbytes,
size_t alignment) {
return std::make_unique<VTCMAllocation>(nbytes, alignment);
}

HexagonBuffer::HexagonBuffer(void* data, Optional<String> scope) : managed_{false} {
HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope)
: ndim_(1), nbytes_(nbytes) {
SetStorageScope(scope);
allocations_.push_back(data);

std::unique_ptr<Allocation> alloca = nullptr;
if (GetStorageScope() == StorageScope::kDDR) {
alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
} else if (GetStorageScope() == StorageScope::kVTCM) {
alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
}
CHECK(alloca != nullptr);
allocations_.push_back(alloca->data_);
managed_allocations_.push_back(std::move(alloca));
}

HexagonBuffer::~HexagonBuffer() {
if (managed_) {
for (auto& ptr : allocations_) {
free(ptr);
HexagonBuffer::HexagonBuffer(size_t ndim, size_t nbytes, size_t alignment, Optional<String> scope)
: ndim_(ndim), nbytes_(ndim * nbytes) {
SetStorageScope(scope);
for (size_t i = 0; i < ndim; ++i) {
std::unique_ptr<Allocation> alloca = nullptr;
if (GetStorageScope() == StorageScope::kDDR) {
alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
} else if (GetStorageScope() == StorageScope::kVTCM) {
alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
}
CHECK(alloca != nullptr);
allocations_.push_back(alloca->data_);
managed_allocations_.push_back(std::move(alloca));
}
}

HexagonBuffer::HexagonBuffer(HexagonBuffer&& other)
: allocations_(other.allocations_),
managed_(other.managed_),
storage_scope_(other.storage_scope_) {
other.allocations_.clear();
other.managed_ = false;
other.storage_scope_ = StorageScope::kDDR;
HexagonBuffer::HexagonBuffer(void* data, size_t nbytes, Optional<String> scope)
: ndim_(1), nbytes_(nbytes) {
SetStorageScope(scope);
// disallow external VTCM allocations
CHECK(GetStorageScope() != HexagonBuffer::StorageScope::kVTCM);
allocations_.push_back(data);
}

HexagonBuffer& HexagonBuffer::operator=(HexagonBuffer&& other) {
std::swap(allocations_, other.allocations_);
std::swap(managed_, other.managed_);
std::swap(storage_scope_, other.storage_scope_);
return *this;
}
HexagonBuffer::~HexagonBuffer() { managed_allocations_.clear(); }

void* HexagonBuffer::GetPointer() {
void** HexagonBuffer::GetPointer() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning a void ** here disambiguates between the cases where there is a single allocation vs. multiple. We always return a pointer to a pointer. This may be controversial.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did we decide to revert this change so that tests pass and revisit once codegen supports pointer array indexing?

Copy link
Contributor Author

@adstraw adstraw Dec 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to keep HexagonBuffer as-is returning a void** which means that it (HexagonBuffer) supports discontiguous allocation. And, I noted the portions of the device API that force / assume contiguous allocation awaiting RFC 39 with "TODO" for myself.

if (!allocations_.size()) {
return nullptr;
}
return (allocations_.size() > 1) ? allocations_.data() : allocations_[0];
return allocations_.data();
}

HexagonBuffer::StorageScope HexagonBuffer::GetStorageScope() const { return storage_scope_; }
Expand All @@ -110,11 +188,70 @@ void HexagonBuffer::SetStorageScope(Optional<String> scope) {
}
}

HexagonBuffer* IsHexagonBuffer(DLTensor* tensor) {
if (TVMDeviceExtType(tensor->device.device_type) == kDLHexagon) {
return static_cast<HexagonBuffer*>(tensor->data);
void HexagonBuffer::CopyTo(void* data, size_t nbytes) {
CHECK(nbytes_ == nbytes);
size_t offset = 0;
for (size_t i = 0; i < ndim_; ++i) {
CHECK(nbytes / ndim_ == managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(data) + offset,
static_cast<const char*>(managed_allocations_[i]->data_),
managed_allocations_[i]->nbytes_);

offset += managed_allocations_[i]->nbytes_;
}
}

void HexagonBuffer::CopyFrom(void* data, size_t nbytes) {
CHECK(nbytes_ == nbytes);
size_t offset = 0;
for (size_t i = 0; i < ndim_; ++i) {
CHECK(nbytes / ndim_ == managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[i]->data_),
static_cast<const char*>(data) + offset, managed_allocations_[i]->nbytes_);

offset += managed_allocations_[i]->nbytes_;
}
}

void HexagonBuffer::CopyFrom(const HexagonBuffer& other) {
CHECK(nbytes_ == other.nbytes_);

if (ndim_ == other.ndim_) {
for (size_t i = 0; i < ndim_; ++i) {
CHECK(managed_allocations_[i]->nbytes_ == other.managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[i]->data_),
static_cast<const char*>(other.managed_allocations_[i]->data_),
managed_allocations_[i]->nbytes_);
}
} else if (ndim_ == 1) {
size_t offset = 0;
for (size_t i = 0; i < other.ndim_; ++i) {
CHECK(nbytes_ / other.ndim_ == other.managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[0]->data_) + offset,
static_cast<const char*>(other.managed_allocations_[i]->data_),
other.managed_allocations_[i]->nbytes_);

offset += other.managed_allocations_[i]->nbytes_;
}
} else if (other.ndim_ == 1) {
size_t offset = 0;
for (size_t i = 0; i < ndim_; ++i) {
CHECK(other.nbytes_ / ndim_ == managed_allocations_[i]->nbytes_);

memcpy(static_cast<char*>(managed_allocations_[i]->data_),
static_cast<const char*>(other.managed_allocations_[0]->data_) + offset,
managed_allocations_[i]->nbytes_);

offset += managed_allocations_[i]->nbytes_;
}
} else {
CHECK(false) << "To copy between Hexagon Buffers they must either have the same number of "
"dimensions or one of the Hexagon Buffers must have a single dimension.";
}
return nullptr;
}

} // namespace hexagon
Expand Down
Loading