diff --git a/cmake/config.cmake b/cmake/config.cmake index 89f5a0597e43..85c5102169a9 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -42,6 +42,9 @@ set(USE_ROCM OFF) # Whether enable SDAccel runtime set(USE_SDACCEL OFF) +# Whether enable Intel FPGA SDK for OpenCL (AOCL) runtime +set(USE_AOCL OFF) + # Whether enable OpenCL runtime set(USE_OPENCL OFF) diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake index 8ae4b29aff48..b30df1864522 100644 --- a/cmake/modules/OpenCL.cmake +++ b/cmake/modules/OpenCL.cmake @@ -19,6 +19,18 @@ else() list(APPEND COMPILER_SRCS src/codegen/opt/build_sdaccel_off.cc) endif(USE_SDACCEL) +if(USE_AOCL) + message(STATUS "Build with Intel FPGA SDK for OpenCL support") + file(GLOB RUNTIME_AOCL_SRCS src/runtime/opencl/aocl/*.cc) + list(APPEND RUNTIME_SRCS ${RUNTIME_AOCL_SRCS}) + if(NOT USE_OPENCL) + message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL") + set(USE_OPENCL ON) + endif() +else() + list(APPEND COMPILER_SRCS src/codegen/opt/build_aocl_off.cc) +endif(USE_AOCL) + if(USE_OPENCL) find_package(OpenCL REQUIRED) message(STATUS "Build with OpenCL support") diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md new file mode 100644 index 000000000000..bd0dae97879d --- /dev/null +++ b/docs/deploy/aocl_fpga.md @@ -0,0 +1,92 @@ +AOCL Backend Example +==================== + +TVM supports Intel FPGA SDK for OpenCL also known as AOCL. Here is a tutorial for how to use TVM with AOCL. + +***Note***: This feature is still experimental. We cannot use AOCL to deploy an end to end neural networks for now. In addition, we only tested compilation for emulation mode of AOCL. + +We use two python scripts for this tutorial. + +- build.py - a script to synthesize FPGA bitstream. +``` +import tvm + +tgt_host="llvm" +tgt="aocl -device=s5_ref -mattr=emulator" + +n = tvm.var("n") +A = tvm.placeholder((n,), name='A') +B = tvm.placeholder((n,), name='B') +C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") + +s = tvm.create_schedule(C.op) +px, x = s[C].split(C.op.axis[0], nparts=1) + +s[C].bind(px, tvm.thread_axis("pipeline")) + +fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd") + +fadd.save("myadd.o") +fadd.imported_modules[0].save("myadd.aocx") + +tvm.contrib.cc.create_shared("myadd.so", ["myadd.o"]) +``` + +- run.py - a script to use FPGA as an accelerator. +``` +import tvm +import numpy as np +import os + +tgt="aocl -device=s5_ref -mattr=emulator" + +fadd = tvm.module.load("myadd.so") +fadd_dev = tvm.module.load("myadd.aocx") +fadd.import_module(fadd_dev) + +ctx = tvm.context(tgt, 0) + +n = 1024 +a = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx) +b = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx) +c = tvm.nd.array(np.zeros(n, dtype="float32"), ctx) + +fadd(a, b, c) +np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy()) +``` + +Setup +----- + +- Install AOCL 17.1 on Ubuntu 16.04.4 LTS. +- Install BSP for your FPGA device. +- Install FPGA device driver. +- Create an ICD file at /etc/OpenCL/vendors/Altera.icd so that the OpenCL platform can be found. +``` +/opt/intelFPGA/17.1/hld/linux64/lib/libalteracl.so +``` +- Create an FCD file for example at /opt/Intel/OpenCL/Boards/s5_ref.fcd so that your FPGA device can be found. +``` +/opt/intelFPGA/17.1/hld/board/s5_ref/linux64/lib/libaltera_s5_ref_mmd.so +``` +- Setup TVM with AOCL and OpenCL enabled. + +Emulation +--------- + +- Run software emulation +``` +export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 + +python build.py +python run.py +``` + +- Run on FPGA devices (not tested) + - Change tgt value to "aocl -device=s5_ref" on build.py and run.py +``` +unset CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA + +python build.py +python run.py +``` diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 6fb7a0f3f8b3..17d00bf479aa 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -60,6 +60,7 @@ typedef int64_t tvm_index_t; /*! \brief Extension device types in TVM */ typedef enum { + kDLAOCL = 5, kDLSDAccel = 6, kDLVulkan = 7, kOpenGL = 11, diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 612b54649d74..4c36e82a81ec 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -96,6 +96,7 @@ class TVMContext(ctypes.Structure): 1 : 'cpu', 2 : 'gpu', 4 : 'opencl', + 5 : 'aocl', 6 : 'sdaccel', 7 : 'vulkan', 8 : 'metal', @@ -113,6 +114,7 @@ class TVMContext(ctypes.Structure): 'nvptx': 2, 'cl': 4, 'opencl': 4, + 'aocl' : 5, 'sdaccel': 6, 'vulkan': 7, 'metal': 8, diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index f9b6226f86c4..49b86626add0 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -91,6 +91,9 @@ Target CreateTarget(const std::string& target_name, } else if (target_name == "sdaccel") { t->device_type = kDLOpenCL; t->keys_array.push_back(ir::StringImm::make("sdaccel")); + } else if (target_name == "aocl") { + t->device_type = kDLAOCL; + t->keys_array.push_back(ir::StringImm::make("aocl")); } else if (target_name == "opengl") { t->device_type = kOpenGL; t->keys_array.push_back(ir::StringImm::make("opengl")); diff --git a/src/codegen/codegen_aocl.cc b/src/codegen/codegen_aocl.cc new file mode 100644 index 000000000000..8830588758ef --- /dev/null +++ b/src/codegen/codegen_aocl.cc @@ -0,0 +1,62 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file codegen_aocl.cc + */ +#include +#include +#include +#include "./codegen_opencl.h" +#include "./build_common.h" +#include "../runtime/opencl/aocl/aocl_module.h" +#include "../runtime/file_util.h" + +namespace tvm { +namespace codegen { + +runtime::Module BuildAOCL(Array funcs, std::string target_str) { + // Get code. + using tvm::runtime::Registry; + bool output_ssa = false; + CodeGenOpenCL cg; + cg.Init(output_ssa); + for (LoweredFunc f : funcs) { + cg.AddFunction(f); + } + std::string code = cg.Finish(); + if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) { + code = (*f)(code).operator std::string(); + } + + // Write a .cl file. + runtime::SaveBinaryToFile("aocl.cl", code.c_str()); + + // Compile the .cl file. + Target target = Target::create(target_str); + if (target->device_name == "") { + LOG(FATAL) << "AOCL device name not specified in build target."; + } + std::string cmd = "aoc aocl.cl"; + for (std::string option : target->options()) { + if (option == "-mattr=emulator") { + cmd += " -march=emulator"; + } + } + cmd += " -board=" + target->device_name; + if (system(cmd.c_str()) != 0) { + LOG(FATAL) << "OpenCL offline compilation error."; + } + + // Read .aocx file + std::string aocxbin; + runtime::LoadBinaryFromFile("aocl.aocx", &aocxbin); + + return AOCLModuleCreate(aocxbin, "aocx", ExtractFuncInfo(funcs), code); +} + +TVM_REGISTER_API("codegen.build_aocl") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = BuildAOCL(args[0], args[1]); + }); + +} // namespace codegen +} // namespace tvm diff --git a/src/codegen/opt/build_aocl_off.cc b/src/codegen/opt/build_aocl_off.cc new file mode 100644 index 000000000000..535036016247 --- /dev/null +++ b/src/codegen/opt/build_aocl_off.cc @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2018 by Contributors + * Optional module when build aocl is switched to off + */ +#include "../codegen_source_base.h" +#include "../../runtime/opencl/opencl_module.h" + +namespace tvm { +namespace runtime { + +Module AOCLModuleCreate( + std::string data, + std::string fmt, + std::unordered_map fmap, + std::string source) { + LOG(WARNING) << "AOCL runtime not enabled, return a source module..."; + return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "aocl"); +} + +} // namespace runtime +} // namespace tvm diff --git a/src/pass/verify_memory.cc b/src/pass/verify_memory.cc index 38cd05a4bdd0..bafaaa642ac7 100644 --- a/src/pass/verify_memory.cc +++ b/src/pass/verify_memory.cc @@ -145,7 +145,7 @@ class MemoryAccessVerifier final : protected IRVisitor { } /// Check if a given DLDeviceType/TVMDeviceExtType value denotes FPGA device. static bool IsFPGADevice(int dev_type) { - return kDLSDAccel == dev_type; + return kDLSDAccel == dev_type || kDLAOCL == dev_type; } private: diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 7a7d7ab9f4db..916dfadecb4c 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -32,6 +32,7 @@ inline std::string DeviceName(int type) { case kDLGPU: return "gpu"; case kDLOpenCL: return "opencl"; case kDLSDAccel: return "sdaccel"; + case kDLAOCL: return "aocl"; case kDLVulkan: return "vulkan"; case kDLMetal: return "metal"; case kDLVPI: return "vpi"; diff --git a/src/runtime/opencl/aocl/aocl_common.h b/src/runtime/opencl/aocl/aocl_common.h new file mode 100644 index 000000000000..234053098d1d --- /dev/null +++ b/src/runtime/opencl/aocl/aocl_common.h @@ -0,0 +1,42 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file aocl_common.h + * \brief AOCL common header + */ +#ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_ +#define TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_ + +#include "../opencl_common.h" + +namespace tvm { +namespace runtime { +namespace cl { + +/*! + * \brief Process global AOCL workspace. + */ +class AOCLWorkspace final : public OpenCLWorkspace { + public: + // override OpenCL device API + void Init() final; + bool IsOpenCLDevice(TVMContext ctx) final; + OpenCLThreadEntry* GetThreadEntry() final; + // get the global workspace + static const std::shared_ptr& Global(); +}; + + +/*! \brief Thread local workspace for AOCL */ +class AOCLThreadEntry : public OpenCLThreadEntry { + public: + // constructor + AOCLThreadEntry() + : OpenCLThreadEntry(static_cast(kDLAOCL), AOCLWorkspace::Global()) {} + + // get the global workspace + static AOCLThreadEntry* ThreadLocal(); +}; +} // namespace cl +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_ diff --git a/src/runtime/opencl/aocl/aocl_device_api.cc b/src/runtime/opencl/aocl/aocl_device_api.cc new file mode 100644 index 000000000000..e9cbc6b4cda0 --- /dev/null +++ b/src/runtime/opencl/aocl/aocl_device_api.cc @@ -0,0 +1,44 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file aocl_device_api.cc + */ +#include +#include +#include "./aocl_common.h" + +namespace tvm { +namespace runtime { +namespace cl { + +OpenCLThreadEntry* AOCLWorkspace::GetThreadEntry() { + return AOCLThreadEntry::ThreadLocal(); +} + +const std::shared_ptr& AOCLWorkspace::Global() { + static std::shared_ptr inst = std::make_shared(); + return inst; +} + +void AOCLWorkspace::Init() { + OpenCLWorkspace::Init("aocl", "accelerator", "Intel(R) FPGA SDK for OpenCL(TM)"); +} + +bool AOCLWorkspace::IsOpenCLDevice(TVMContext ctx) { + return ctx.device_type == static_cast(kDLAOCL); +} + +typedef dmlc::ThreadLocalStore AOCLThreadStore; + +AOCLThreadEntry* AOCLThreadEntry::ThreadLocal() { + return AOCLThreadStore::Get(); +} + +TVM_REGISTER_GLOBAL("device_api.aocl") +.set_body([](TVMArgs args, TVMRetValue* rv) { + DeviceAPI* ptr = AOCLWorkspace::Global().get(); + *rv = static_cast(ptr); + }); + +} // namespace cl +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/opencl/aocl/aocl_module.cc b/src/runtime/opencl/aocl/aocl_module.cc new file mode 100644 index 000000000000..a056c5cee671 --- /dev/null +++ b/src/runtime/opencl/aocl/aocl_module.cc @@ -0,0 +1,58 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file aocl_module.cc + */ +#include +#include +#include +#include +#include +#include "./aocl_common.h" +#include "./aocl_module.h" + +namespace tvm { +namespace runtime { + +class AOCLModuleNode : public OpenCLModuleNode { + public: + explicit AOCLModuleNode(std::string data, + std::string fmt, + std::unordered_map fmap, + std::string source) + : OpenCLModuleNode(data, fmt, fmap, source) {} + const std::shared_ptr& GetGlobalWorkspace() final; +}; + +const std::shared_ptr& AOCLModuleNode::GetGlobalWorkspace() { + return cl::AOCLWorkspace::Global(); +} + +Module AOCLModuleCreate( + std::string data, + std::string fmt, + std::unordered_map fmap, + std::string source) { + std::shared_ptr n = + std::make_shared(data, fmt, fmap, source); + n->Init(); + return Module(n); +} + +Module AOCLModuleLoadFile(const std::string& file_name, + const std::string& format) { + std::string data; + std::unordered_map fmap; + std::string fmt = GetFileFormat(file_name, format); + std::string meta_file = GetMetaFilePath(file_name); + LoadBinaryFromFile(file_name, &data); + LoadMetaDataFromFile(meta_file, &fmap); + return AOCLModuleCreate(data, fmt, fmap, std::string()); +} + +TVM_REGISTER_GLOBAL("module.loadfile_aocx") +.set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = AOCLModuleLoadFile(args[0], args[1]); + }); + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/opencl/aocl/aocl_module.h b/src/runtime/opencl/aocl/aocl_module.h new file mode 100644 index 000000000000..83ddbdb358ce --- /dev/null +++ b/src/runtime/opencl/aocl/aocl_module.h @@ -0,0 +1,31 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file aocl_module.h + * \brief Execution handling of OpenCL kernels for AOCL + */ +#ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_ +#define TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_ + +#include +#include +#include +#include +#include "../../meta_data.h" + +namespace tvm { +namespace runtime { +/*! + * \brief create a opencl module for AOCL from data. + * + * \param data The module data. + * \param fmt The format of the data, can be "aocx" + * \param fmap The map function information map of each function. + */ +Module AOCLModuleCreate( + std::string data, + std::string fmt, + std::unordered_map fmap, + std::string source); +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_ diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc index aa51f17d4021..3efd789513ba 100644 --- a/src/runtime/opencl/opencl_module.cc +++ b/src/runtime/opencl/opencl_module.cc @@ -191,7 +191,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w, program_ = clCreateProgramWithSource(w->context, 1, &s, &len, &err); OPENCL_CHECK_ERROR(err); } - } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin") { + } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin" || fmt_ == "aocx") { const unsigned char* s = (const unsigned char *)data_.c_str(); size_t len = data_.length(); cl_int err; diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py index c0a568faca2d..0abefff02778 100644 --- a/tests/python/integration/test_ewise_fpga.py +++ b/tests/python/integration/test_ewise_fpga.py @@ -3,6 +3,7 @@ import os os.environ["XCL_EMULATION_MODE"] = "1" +os.environ["CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA"] = "1" @tvm.register_func def tvm_callback_vhls_postproc(code): @@ -43,6 +44,7 @@ def check_device(device, host="llvm"): if "AWS_PLATFORM" in os.environ: check_device("sdaccel -device=" + os.environ.get("AWS_PLATFORM")) + check_device("aocl -device=s5_ref -mattr=emulator") def test_multi_kernel(): # graph @@ -80,6 +82,7 @@ def check_device(device, host="llvm"): d.asnumpy(), a.asnumpy() * 2 + b.asnumpy(), rtol=1e-5) check_device("sdaccel") + check_device("aocl -device=s5_ref -mattr=emulator") if __name__ == "__main__":